Пример #1
0
/// HandleFloatingPointIV - If the loop has floating induction variable
/// then insert corresponding integer induction variable if possible.
/// For example,
/// for(double i = 0; i < 10000; ++i)
///   bar(i)
/// is converted into
/// for(int i = 0; i < 10000; ++i)
///   bar((double)i);
///
void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
  unsigned BackEdge     = IncomingEdge^1;

  // Check incoming value.
  ConstantFP *InitValueVal =
    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));

  int64_t InitValue;
  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
    return;

  // Check IV increment. Reject this PN if increment operation is not
  // an add or increment value can not be represented by an integer.
  BinaryOperator *Incr =
    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
  
  // If this is not an add of the PHI with a constantfp, or if the constant fp
  // is not an integer, bail out.
  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
  int64_t IncValue;
  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
    return;

  // Check Incr uses. One user is PN and the other user is an exit condition
  // used by the conditional terminator.
  Value::use_iterator IncrUse = Incr->use_begin();
  Instruction *U1 = cast<Instruction>(*IncrUse++);
  if (IncrUse == Incr->use_end()) return;
  Instruction *U2 = cast<Instruction>(*IncrUse++);
  if (IncrUse != Incr->use_end()) return;

  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
  // only used by a branch, we can't transform it.
  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
  if (!Compare)
    Compare = dyn_cast<FCmpInst>(U2);
  if (Compare == 0 || !Compare->hasOneUse() ||
      !isa<BranchInst>(Compare->use_back()))
    return;
  
  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());

  // We need to verify that the branch actually controls the iteration count
  // of the loop.  If not, the new IV can overflow and no one will notice.
  // The branch block must be in the loop and one of the successors must be out
  // of the loop.
  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
  if (!L->contains(TheBr->getParent()) ||
      (L->contains(TheBr->getSuccessor(0)) &&
       L->contains(TheBr->getSuccessor(1))))
    return;
  
  
  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
  // transform it.
  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
  int64_t ExitValue;
  if (ExitValueVal == 0 ||
      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
    return;
  
  // Find new predicate for integer comparison.
  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
  switch (Compare->getPredicate()) {
  default: return;  // Unknown comparison.
  case CmpInst::FCMP_OEQ:
  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
  case CmpInst::FCMP_ONE:
  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
  case CmpInst::FCMP_OGT:
  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
  case CmpInst::FCMP_OGE:
  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
  case CmpInst::FCMP_OLT:
  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
  case CmpInst::FCMP_OLE:
  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
  }
  
  // We convert the floating point induction variable to a signed i32 value if
  // we can.  This is only safe if the comparison will not overflow in a way
  // that won't be trapped by the integer equivalent operations.  Check for this
  // now.
  // TODO: We could use i64 if it is native and the range requires it.
  
  // The start/stride/exit values must all fit in signed i32.
  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
    return;

  // If not actually striding (add x, 0.0), avoid touching the code.
  if (IncValue == 0)
    return;

  // Positive and negative strides have different safety conditions.
  if (IncValue > 0) {
    // If we have a positive stride, we require the init to be less than the
    // exit value and an equality or less than comparison.
    if (InitValue >= ExitValue ||
        NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
      return;
    
    uint32_t Range = uint32_t(ExitValue-InitValue);
    if (NewPred == CmpInst::ICMP_SLE) {
      // Normalize SLE -> SLT, check for infinite loop.
      if (++Range == 0) return;  // Range overflows.
    }
    
    unsigned Leftover = Range % uint32_t(IncValue);
    
    // If this is an equality comparison, we require that the strided value
    // exactly land on the exit value, otherwise the IV condition will wrap
    // around and do things the fp IV wouldn't.
    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
        Leftover != 0)
      return;
    
    // If the stride would wrap around the i32 before exiting, we can't
    // transform the IV.
    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
      return;
    
  } else {
    // If we have a negative stride, we require the init to be greater than the
    // exit value and an equality or greater than comparison.
    if (InitValue >= ExitValue ||
        NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
      return;
    
    uint32_t Range = uint32_t(InitValue-ExitValue);
    if (NewPred == CmpInst::ICMP_SGE) {
      // Normalize SGE -> SGT, check for infinite loop.
      if (++Range == 0) return;  // Range overflows.
    }
    
    unsigned Leftover = Range % uint32_t(-IncValue);
    
    // If this is an equality comparison, we require that the strided value
    // exactly land on the exit value, otherwise the IV condition will wrap
    // around and do things the fp IV wouldn't.
    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
        Leftover != 0)
      return;
    
    // If the stride would wrap around the i32 before exiting, we can't
    // transform the IV.
    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
      return;
  }
  
  const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());

  // Insert new integer induction variable.
  PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN);
  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
                      PN->getIncomingBlock(IncomingEdge));

  Value *NewAdd =
    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
                              Incr->getName()+".int", Incr);
  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));

  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
                                      ConstantInt::get(Int32Ty, ExitValue),
                                      Compare->getName());

  // In the following deletions, PN may become dead and may be deleted.
  // Use a WeakVH to observe whether this happens.
  WeakVH WeakPH = PN;

  // Delete the old floating point exit comparison.  The branch starts using the
  // new comparison.
  NewCompare->takeName(Compare);
  Compare->replaceAllUsesWith(NewCompare);
  RecursivelyDeleteTriviallyDeadInstructions(Compare);

  // Delete the old floating point increment.
  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
  RecursivelyDeleteTriviallyDeadInstructions(Incr);

  // If the FP induction variable still has uses, this is because something else
  // in the loop uses its value.  In order to canonicalize the induction
  // variable, we chose to eliminate the IV and rewrite it in terms of an
  // int->fp cast.
  //
  // We give preference to sitofp over uitofp because it is faster on most
  // platforms.
  if (WeakPH) {
    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
                                 PN->getParent()->getFirstNonPHI());
    PN->replaceAllUsesWith(Conv);
    RecursivelyDeleteTriviallyDeadInstructions(PN);
  }

  // Add a new IVUsers entry for the newly-created integer PHI.
  IU->AddUsersIfInteresting(NewPHI);
}
Пример #2
0
// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
// BasicBlock, and converting all returns to unconditional branches to this
// new basic block.  The singular exit node is returned.
//
// If there are no return stmts in the Function, a null pointer is returned.
//
bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
  // Loop over all of the blocks in a function, tracking all of the blocks that
  // return.
  //
  std::vector<BasicBlock*> ReturningBlocks;
  std::vector<BasicBlock*> UnreachableBlocks;
  for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
    if (isa<ReturnInst>(I->getTerminator()))
      ReturningBlocks.push_back(I);
    else if (isa<UnreachableInst>(I->getTerminator()))
      UnreachableBlocks.push_back(I);

  // Then unreachable blocks.
  if (UnreachableBlocks.empty()) {
    UnreachableBlock = nullptr;
  } else if (UnreachableBlocks.size() == 1) {
    UnreachableBlock = UnreachableBlocks.front();
  } else {
    UnreachableBlock = BasicBlock::Create(F.getContext(),
                                          "UnifiedUnreachableBlock", &F);
    new UnreachableInst(F.getContext(), UnreachableBlock);

    for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
           E = UnreachableBlocks.end(); I != E; ++I) {
      BasicBlock *BB = *I;
      BB->getInstList().pop_back();  // Remove the unreachable inst.
      BranchInst::Create(UnreachableBlock, BB);
    }
  }

  // Now handle return blocks.
  if (ReturningBlocks.empty()) {
    ReturnBlock = nullptr;
    return false;                          // No blocks return
  } else if (ReturningBlocks.size() == 1) {
    ReturnBlock = ReturningBlocks.front(); // Already has a single return block
    return false;
  }

  // Otherwise, we need to insert a new basic block into the function, add a PHI
  // nodes (if the function returns values), and convert all of the return
  // instructions into unconditional branches.
  //
  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
                                               "UnifiedReturnBlock", &F);

  PHINode *PN = nullptr;
  if (F.getReturnType()->isVoidTy()) {
    ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
  } else {
    // If the function doesn't return void... add a PHI node to the block...
    PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
                         "UnifiedRetVal");
    NewRetBlock->getInstList().push_back(PN);
    ReturnInst::Create(F.getContext(), PN, NewRetBlock);
  }

  // Loop over all of the blocks, replacing the return instruction with an
  // unconditional branch.
  //
  for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(),
         E = ReturningBlocks.end(); I != E; ++I) {
    BasicBlock *BB = *I;

    // Add an incoming element to the PHI node for every return instruction that
    // is merging into this new block...
    if (PN)
      PN->addIncoming(BB->getTerminator()->getOperand(0), BB);

    BB->getInstList().pop_back();  // Remove the return insn
    BranchInst::Create(NewRetBlock, BB);
  }
  ReturnBlock = NewRetBlock;
  return true;
}
Пример #3
0
/// CleanupAndPrepareModules - Get the specified modules ready for code
/// generator testing.
///
static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
                                     Module *Safe) {
  // Clean up the modules, removing extra cruft that we don't need anymore...
  Test = BD.performFinalCleanups(Test);

  // If we are executing the JIT, we have several nasty issues to take care of.
  if (!BD.isExecutingJIT()) return;

  // First, if the main function is in the Safe module, we must add a stub to
  // the Test module to call into it.  Thus, we create a new function `main'
  // which just calls the old one.
  if (Function *oldMain = Safe->getFunction("main"))
    if (!oldMain->isDeclaration()) {
      // Rename it
      oldMain->setName("llvm_bugpoint_old_main");
      // Create a NEW `main' function with same type in the test module.
      Function *newMain = Function::Create(oldMain->getFunctionType(),
                                           GlobalValue::ExternalLinkage,
                                           "main", Test);
      // Create an `oldmain' prototype in the test module, which will
      // corresponds to the real main function in the same module.
      Function *oldMainProto = Function::Create(oldMain->getFunctionType(),
                                                GlobalValue::ExternalLinkage,
                                                oldMain->getName(), Test);
      // Set up and remember the argument list for the main function.
      std::vector<Value*> args;
      for (Function::arg_iterator
             I = newMain->arg_begin(), E = newMain->arg_end(),
             OI = oldMain->arg_begin(); I != E; ++I, ++OI) {
        I->setName(OI->getName());    // Copy argument names from oldMain
        args.push_back(I);
      }

      // Call the old main function and return its result
      BasicBlock *BB = BasicBlock::Create(Safe->getContext(), "entry", newMain);
      CallInst *call = CallInst::Create(oldMainProto, args.begin(), args.end(),
                                        "", BB);

      // If the type of old function wasn't void, return value of call
      ReturnInst::Create(Safe->getContext(), call, BB);
    }

  // The second nasty issue we must deal with in the JIT is that the Safe
  // module cannot directly reference any functions defined in the test
  // module.  Instead, we use a JIT API call to dynamically resolve the
  // symbol.

  // Add the resolver to the Safe module.
  // Prototype: void *getPointerToNamedFunction(const char* Name)
  Constant *resolverFunc =
    Safe->getOrInsertFunction("getPointerToNamedFunction",
                    PointerType::getUnqual(Type::getInt8Ty(Safe->getContext())),
                    PointerType::getUnqual(Type::getInt8Ty(Safe->getContext())),
                       (Type *)0);

  // Use the function we just added to get addresses of functions we need.
  for (Module::iterator F = Safe->begin(), E = Safe->end(); F != E; ++F) {
    if (F->isDeclaration() && !F->use_empty() && &*F != resolverFunc &&
        !F->isIntrinsic() /* ignore intrinsics */) {
      Function *TestFn = Test->getFunction(F->getName());

      // Don't forward functions which are external in the test module too.
      if (TestFn && !TestFn->isDeclaration()) {
        // 1. Add a string constant with its name to the global file
        Constant *InitArray = ConstantArray::get(F->getContext(), F->getName());
        GlobalVariable *funcName =
          new GlobalVariable(*Safe, InitArray->getType(), true /*isConstant*/,
                             GlobalValue::InternalLinkage, InitArray,
                             F->getName() + "_name");

        // 2. Use `GetElementPtr *funcName, 0, 0' to convert the string to an
        // sbyte* so it matches the signature of the resolver function.

        // GetElementPtr *funcName, ulong 0, ulong 0
        std::vector<Constant*> GEPargs(2,
                     Constant::getNullValue(Type::getInt32Ty(F->getContext())));
        Value *GEP =
                ConstantExpr::getGetElementPtr(funcName, &GEPargs[0], 2);
        std::vector<Value*> ResolverArgs;
        ResolverArgs.push_back(GEP);

        // Rewrite uses of F in global initializers, etc. to uses of a wrapper
        // function that dynamically resolves the calls to F via our JIT API
        if (!F->use_empty()) {
          // Create a new global to hold the cached function pointer.
          Constant *NullPtr = ConstantPointerNull::get(F->getType());
          GlobalVariable *Cache =
            new GlobalVariable(*F->getParent(), F->getType(), 
                               false, GlobalValue::InternalLinkage,
                               NullPtr,F->getName()+".fpcache");

          // Construct a new stub function that will re-route calls to F
          const FunctionType *FuncTy = F->getFunctionType();
          Function *FuncWrapper = Function::Create(FuncTy,
                                                   GlobalValue::InternalLinkage,
                                                   F->getName() + "_wrapper",
                                                   F->getParent());
          BasicBlock *EntryBB  = BasicBlock::Create(F->getContext(),
                                                    "entry", FuncWrapper);
          BasicBlock *DoCallBB = BasicBlock::Create(F->getContext(),
                                                    "usecache", FuncWrapper);
          BasicBlock *LookupBB = BasicBlock::Create(F->getContext(),
                                                    "lookupfp", FuncWrapper);

          // Check to see if we already looked up the value.
          Value *CachedVal = new LoadInst(Cache, "fpcache", EntryBB);
          Value *IsNull = new ICmpInst(*EntryBB, ICmpInst::ICMP_EQ, CachedVal,
                                       NullPtr, "isNull");
          BranchInst::Create(LookupBB, DoCallBB, IsNull, EntryBB);

          // Resolve the call to function F via the JIT API:
          //
          // call resolver(GetElementPtr...)
          CallInst *Resolver =
            CallInst::Create(resolverFunc, ResolverArgs.begin(),
                             ResolverArgs.end(), "resolver", LookupBB);

          // Cast the result from the resolver to correctly-typed function.
          CastInst *CastedResolver =
            new BitCastInst(Resolver,
                            PointerType::getUnqual(F->getFunctionType()),
                            "resolverCast", LookupBB);

          // Save the value in our cache.
          new StoreInst(CastedResolver, Cache, LookupBB);
          BranchInst::Create(DoCallBB, LookupBB);

          PHINode *FuncPtr = PHINode::Create(NullPtr->getType(),
                                             "fp", DoCallBB);
          FuncPtr->addIncoming(CastedResolver, LookupBB);
          FuncPtr->addIncoming(CachedVal, EntryBB);

          // Save the argument list.
          std::vector<Value*> Args;
          for (Function::arg_iterator i = FuncWrapper->arg_begin(),
                 e = FuncWrapper->arg_end(); i != e; ++i)
            Args.push_back(i);

          // Pass on the arguments to the real function, return its result
          if (F->getReturnType() == Type::getVoidTy(F->getContext())) {
            CallInst::Create(FuncPtr, Args.begin(), Args.end(), "", DoCallBB);
            ReturnInst::Create(F->getContext(), DoCallBB);
          } else {
            CallInst *Call = CallInst::Create(FuncPtr, Args.begin(), Args.end(),
                                              "retval", DoCallBB);
            ReturnInst::Create(F->getContext(),Call, DoCallBB);
          }

          // Use the wrapper function instead of the old function
          F->replaceAllUsesWith(FuncWrapper);
        }
      }
    }
  }

  if (verifyModule(*Test) || verifyModule(*Safe)) {
    errs() << "Bugpoint has a bug, which corrupted a module!!\n";
    abort();
  }
}
Пример #4
0
/// SplitLandingPadPredecessors - This method transforms the landing pad,
/// OrigBB, by introducing two new basic blocks into the function. One of those
/// new basic blocks gets the predecessors listed in Preds. The other basic
/// block gets the remaining predecessors of OrigBB. The landingpad instruction
/// OrigBB is clone into both of the new basic blocks. The new blocks are given
/// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector.
///
/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular,
/// it does not preserve LoopSimplify (because it's complicated to handle the
/// case where one of the edges being split is an exit of a loop with other
/// exits).
///
void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
                                       ArrayRef<BasicBlock*> Preds,
                                       const char *Suffix1, const char *Suffix2,
                                       Pass *P,
                                       SmallVectorImpl<BasicBlock*> &NewBBs) {
  assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");

  // Create a new basic block for OrigBB's predecessors listed in Preds. Insert
  // it right before the original block.
  BasicBlock *NewBB1 = BasicBlock::Create(OrigBB->getContext(),
                                          OrigBB->getName() + Suffix1,
                                          OrigBB->getParent(), OrigBB);
  NewBBs.push_back(NewBB1);

  // The new block unconditionally branches to the old block.
  BranchInst *BI1 = BranchInst::Create(OrigBB, NewBB1);

  // Move the edges from Preds to point to NewBB1 instead of OrigBB.
  for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
    // This is slightly more strict than necessary; the minimum requirement
    // is that there be no more than one indirectbr branching to BB. And
    // all BlockAddress uses would need to be updated.
    assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
           "Cannot split an edge from an IndirectBrInst");
    Preds[i]->getTerminator()->replaceUsesOfWith(OrigBB, NewBB1);
  }

  // Update DominatorTree, LoopInfo, and LCCSA analysis information.
  bool HasLoopExit = false;
  UpdateAnalysisInformation(OrigBB, NewBB1, Preds, P, HasLoopExit);

  // Update the PHI nodes in OrigBB with the values coming from NewBB1.
  UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, P, HasLoopExit);

  // Move the remaining edges from OrigBB to point to NewBB2.
  SmallVector<BasicBlock*, 8> NewBB2Preds;
  for (pred_iterator i = pred_begin(OrigBB), e = pred_end(OrigBB);
       i != e; ) {
    BasicBlock *Pred = *i++;
    if (Pred == NewBB1) continue;
    assert(!isa<IndirectBrInst>(Pred->getTerminator()) &&
           "Cannot split an edge from an IndirectBrInst");
    NewBB2Preds.push_back(Pred);
    e = pred_end(OrigBB);
  }

  BasicBlock *NewBB2 = 0;
  if (!NewBB2Preds.empty()) {
    // Create another basic block for the rest of OrigBB's predecessors.
    NewBB2 = BasicBlock::Create(OrigBB->getContext(),
                                OrigBB->getName() + Suffix2,
                                OrigBB->getParent(), OrigBB);
    NewBBs.push_back(NewBB2);

    // The new block unconditionally branches to the old block.
    BranchInst *BI2 = BranchInst::Create(OrigBB, NewBB2);

    // Move the remaining edges from OrigBB to point to NewBB2.
    for (SmallVectorImpl<BasicBlock*>::iterator
           i = NewBB2Preds.begin(), e = NewBB2Preds.end(); i != e; ++i)
      (*i)->getTerminator()->replaceUsesOfWith(OrigBB, NewBB2);

    // Update DominatorTree, LoopInfo, and LCCSA analysis information.
    HasLoopExit = false;
    UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, P, HasLoopExit);

    // Update the PHI nodes in OrigBB with the values coming from NewBB2.
    UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, P, HasLoopExit);
  }

  LandingPadInst *LPad = OrigBB->getLandingPadInst();
  Instruction *Clone1 = LPad->clone();
  Clone1->setName(Twine("lpad") + Suffix1);
  NewBB1->getInstList().insert(NewBB1->getFirstInsertionPt(), Clone1);

  if (NewBB2) {
    Instruction *Clone2 = LPad->clone();
    Clone2->setName(Twine("lpad") + Suffix2);
    NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2);

    // Create a PHI node for the two cloned landingpad instructions.
    PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
    PN->addIncoming(Clone1, NewBB1);
    PN->addIncoming(Clone2, NewBB2);
    LPad->replaceAllUsesWith(PN);
    LPad->eraseFromParent();
  } else {
    // There is no second clone. Just replace the landing pad with the first
    // clone.
    LPad->replaceAllUsesWith(Clone1);
    LPad->eraseFromParent();
  }
}
Пример #5
0
Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
  // If there is no definition of the renamed variable in this block, just use
  // GetValueAtEndOfBlock to do our work.
  if (!HasValueForBlock(BB))
    return GetValueAtEndOfBlock(BB);

  // Otherwise, we have the hard case.  Get the live-in values for each
  // predecessor.
  SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
  Value *SingularValue = nullptr;

  // We can get our predecessor info by walking the pred_iterator list, but it
  // is relatively slow.  If we already have PHI nodes in this block, walk one
  // of them to get the predecessor list instead.
  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
      Value *PredVal = GetValueAtEndOfBlock(PredBB);
      PredValues.push_back(std::make_pair(PredBB, PredVal));

      // Compute SingularValue.
      if (i == 0)
        SingularValue = PredVal;
      else if (PredVal != SingularValue)
        SingularValue = nullptr;
    }
  } else {
    bool isFirstPred = true;
    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
      BasicBlock *PredBB = *PI;
      Value *PredVal = GetValueAtEndOfBlock(PredBB);
      PredValues.push_back(std::make_pair(PredBB, PredVal));

      // Compute SingularValue.
      if (isFirstPred) {
        SingularValue = PredVal;
        isFirstPred = false;
      } else if (PredVal != SingularValue)
        SingularValue = nullptr;
    }
  }

  // If there are no predecessors, just return undef.
  if (PredValues.empty())
    return UndefValue::get(ProtoType);

  // Otherwise, if all the merged values are the same, just use it.
  if (SingularValue)
    return SingularValue;

  // Otherwise, we do need a PHI: check to see if we already have one available
  // in this block that produces the right value.
  if (isa<PHINode>(BB->begin())) {
    SmallDenseMap<BasicBlock*, Value*, 8> ValueMapping(PredValues.begin(),
                                                       PredValues.end());
    PHINode *SomePHI;
    for (BasicBlock::iterator It = BB->begin();
         (SomePHI = dyn_cast<PHINode>(It)); ++It) {
      if (IsEquivalentPHI(SomePHI, ValueMapping))
        return SomePHI;
    }
  }

  // Ok, we have no way out, insert a new one now.
  PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(),
                                         ProtoName, &BB->front());

  // Fill in all the predecessors of the PHI.
  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
    InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);

  // See if the PHI node can be merged to a single value.  This can happen in
  // loop cases when we get a PHI of itself and one other value.
  if (Value *V =
          SimplifyInstruction(InsertedPHI, BB->getModule()->getDataLayout())) {
    InsertedPHI->eraseFromParent();
    return V;
  }

  // Set the DebugLoc of the inserted PHI, if available.
  DebugLoc DL;
  if (const Instruction *I = BB->getFirstNonPHI())
      DL = I->getDebugLoc();
  InsertedPHI->setDebugLoc(DL);

  // If the client wants to know about all new instructions, tell it.
  if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);

  DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
  return InsertedPHI;
}
Пример #6
0
/// Create a clone of the blocks in a loop and connect them together.
/// If UnrollProlog is true, loop structure will not be cloned, otherwise a new
/// loop will be created including all cloned blocks, and the iterator of it
/// switches to count NewIter down to 0.
///
static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
                            BasicBlock *InsertTop, BasicBlock *InsertBot,
                            std::vector<BasicBlock *> &NewBlocks,
                            LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
                            LoopInfo *LI) {
    BasicBlock *Preheader = L->getLoopPreheader();
    BasicBlock *Header = L->getHeader();
    BasicBlock *Latch = L->getLoopLatch();
    Function *F = Header->getParent();
    LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
    LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
    Loop *NewLoop = 0;
    Loop *ParentLoop = L->getParentLoop();
    if (!UnrollProlog) {
        NewLoop = new Loop();
        if (ParentLoop)
            ParentLoop->addChildLoop(NewLoop);
        else
            LI->addTopLevelLoop(NewLoop);
    }

    // For each block in the original loop, create a new copy,
    // and update the value map with the newly created values.
    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
        BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".prol", F);
        NewBlocks.push_back(NewBB);

        if (NewLoop)
            NewLoop->addBasicBlockToLoop(NewBB, *LI);
        else if (ParentLoop)
            ParentLoop->addBasicBlockToLoop(NewBB, *LI);

        VMap[*BB] = NewBB;
        if (Header == *BB) {
            // For the first block, add a CFG connection to this newly
            // created block.
            InsertTop->getTerminator()->setSuccessor(0, NewBB);

        }
        if (Latch == *BB) {
            // For the last block, if UnrollProlog is true, create a direct jump to
            // InsertBot. If not, create a loop back to cloned head.
            VMap.erase((*BB)->getTerminator());
            BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
            BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
            IRBuilder<> Builder(LatchBR);
            if (UnrollProlog) {
                Builder.CreateBr(InsertBot);
            } else {
                PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, "prol.iter",
                                                  FirstLoopBB->getFirstNonPHI());
                Value *IdxSub =
                    Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
                                      NewIdx->getName() + ".sub");
                Value *IdxCmp =
                    Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
                Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
                NewIdx->addIncoming(NewIter, InsertTop);
                NewIdx->addIncoming(IdxSub, NewBB);
            }
            LatchBR->eraseFromParent();
        }
    }

    // Change the incoming values to the ones defined in the preheader or
    // cloned loop.
    for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
        PHINode *NewPHI = cast<PHINode>(VMap[I]);
        if (UnrollProlog) {
            VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
            cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
        } else {
            unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
            NewPHI->setIncomingBlock(idx, InsertTop);
            BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
            idx = NewPHI->getBasicBlockIndex(Latch);
            Value *InVal = NewPHI->getIncomingValue(idx);
            NewPHI->setIncomingBlock(idx, NewLatch);
            if (VMap[InVal])
                NewPHI->setIncomingValue(idx, VMap[InVal]);
        }
    }
    if (NewLoop) {
        // Add unroll disable metadata to disable future unrolling for this loop.
        SmallVector<Metadata *, 4> MDs;
        // Reserve first location for self reference to the LoopID metadata node.
        MDs.push_back(nullptr);
        MDNode *LoopID = NewLoop->getLoopID();
        if (LoopID) {
            // First remove any existing loop unrolling metadata.
            for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
                bool IsUnrollMetadata = false;
                MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
                if (MD) {
                    const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
                    IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
                }
                if (!IsUnrollMetadata)
                    MDs.push_back(LoopID->getOperand(i));
            }
        }

        LLVMContext &Context = NewLoop->getHeader()->getContext();
        SmallVector<Metadata *, 1> DisableOperands;
        DisableOperands.push_back(MDString::get(Context, "llvm.loop.unroll.disable"));
        MDNode *DisableNode = MDNode::get(Context, DisableOperands);
        MDs.push_back(DisableNode);

        MDNode *NewLoopID = MDNode::get(Context, MDs);
        // Set operand 0 to refer to the loop id itself.
        NewLoopID->replaceOperandWith(0, NewLoopID);
        NewLoop->setLoopID(NewLoopID);
    }
}
Пример #7
0
/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
/// an unconditional branch in it.
void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
  BasicBlock *DestBB = BI->getSuccessor(0);

  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);

  // If the destination block has a single pred, then this is a trivial edge,
  // just collapse it.
  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
    if (SinglePred != DestBB) {
      // Remember if SinglePred was the entry block of the function.  If so, we
      // will need to move BB back to the entry position.
      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
      MergeBasicBlockIntoOnlyPred(DestBB, this);

      if (isEntry && BB != &BB->getParent()->getEntryBlock())
        BB->moveBefore(&BB->getParent()->getEntryBlock());
      
      DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
      return;
    }
  }

  // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
  // to handle the new incoming edges it is about to have.
  PHINode *PN;
  for (BasicBlock::iterator BBI = DestBB->begin();
       (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
    // Remove the incoming value for BB, and remember it.
    Value *InVal = PN->removeIncomingValue(BB, false);

    // Two options: either the InVal is a phi node defined in BB or it is some
    // value that dominates BB.
    PHINode *InValPhi = dyn_cast<PHINode>(InVal);
    if (InValPhi && InValPhi->getParent() == BB) {
      // Add all of the input values of the input PHI as inputs of this phi.
      for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
        PN->addIncoming(InValPhi->getIncomingValue(i),
                        InValPhi->getIncomingBlock(i));
    } else {
      // Otherwise, add one instance of the dominating value for each edge that
      // we will be adding.
      if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
        for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
          PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
      } else {
        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
          PN->addIncoming(InVal, *PI);
      }
    }
  }

  // The PHIs are now updated, change everything that refers to BB to use
  // DestBB and remove BB.
  BB->replaceAllUsesWith(DestBB);
  if (PFI) {
    PFI->replaceAllUses(BB, DestBB);
    PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
  }
  BB->eraseFromParent();
  ++NumElim;

  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
  Module &M = *F.getParent();
  LLVMContext &C = F.getContext();
  IRBuilder<> IRB(C);
  SmallVector<Instruction *, 64> ToErase;
  // Vector of %setjmpTable values
  std::vector<Instruction *> SetjmpTableInsts;
  // Vector of %setjmpTableSize values
  std::vector<Instruction *> SetjmpTableSizeInsts;

  // Setjmp preparation

  // This instruction effectively means %setjmpTableSize = 4.
  // We create this as an instruction intentionally, and we don't want to fold
  // this instruction to a constant 4, because this value will be used in
  // SSAUpdater.AddAvailableValue(...) later.
  BasicBlock &EntryBB = F.getEntryBlock();
  BinaryOperator *SetjmpTableSize = BinaryOperator::Create(
      Instruction::Add, IRB.getInt32(4), IRB.getInt32(0), "setjmpTableSize",
      &*EntryBB.getFirstInsertionPt());
  // setjmpTable = (int *) malloc(40);
  Instruction *SetjmpTable = CallInst::CreateMalloc(
      SetjmpTableSize, IRB.getInt32Ty(), IRB.getInt32Ty(), IRB.getInt32(40),
      nullptr, nullptr, "setjmpTable");
  // setjmpTable[0] = 0;
  IRB.SetInsertPoint(SetjmpTableSize);
  IRB.CreateStore(IRB.getInt32(0), SetjmpTable);
  SetjmpTableInsts.push_back(SetjmpTable);
  SetjmpTableSizeInsts.push_back(SetjmpTableSize);

  // Setjmp transformation
  std::vector<PHINode *> SetjmpRetPHIs;
  Function *SetjmpF = M.getFunction("setjmp");
  for (User *U : SetjmpF->users()) {
    auto *CI = dyn_cast<CallInst>(U);
    if (!CI)
      report_fatal_error("Does not support indirect calls to setjmp");

    BasicBlock *BB = CI->getParent();
    if (BB->getParent() != &F) // in other function
      continue;

    // The tail is everything right after the call, and will be reached once
    // when setjmp is called, and later when longjmp returns to the setjmp
    BasicBlock *Tail = SplitBlock(BB, CI->getNextNode());
    // Add a phi to the tail, which will be the output of setjmp, which
    // indicates if this is the first call or a longjmp back. The phi directly
    // uses the right value based on where we arrive from
    IRB.SetInsertPoint(Tail->getFirstNonPHI());
    PHINode *SetjmpRet = IRB.CreatePHI(IRB.getInt32Ty(), 2, "setjmp.ret");

    // setjmp initial call returns 0
    SetjmpRet->addIncoming(IRB.getInt32(0), BB);
    // The proper output is now this, not the setjmp call itself
    CI->replaceAllUsesWith(SetjmpRet);
    // longjmp returns to the setjmp will add themselves to this phi
    SetjmpRetPHIs.push_back(SetjmpRet);

    // Fix call target
    // Our index in the function is our place in the array + 1 to avoid index
    // 0, because index 0 means the longjmp is not ours to handle.
    IRB.SetInsertPoint(CI);
    Value *Args[] = {CI->getArgOperand(0), IRB.getInt32(SetjmpRetPHIs.size()),
                     SetjmpTable, SetjmpTableSize};
    Instruction *NewSetjmpTable =
        IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable");
    Instruction *NewSetjmpTableSize =
        IRB.CreateLoad(TempRet0GV, "setjmpTableSize");
    SetjmpTableInsts.push_back(NewSetjmpTable);
    SetjmpTableSizeInsts.push_back(NewSetjmpTableSize);
    ToErase.push_back(CI);
  }

  // Update each call that can longjmp so it can return to a setjmp where
  // relevant.

  // Because we are creating new BBs while processing and don't want to make
  // all these newly created BBs candidates again for longjmp processing, we
  // first make the vector of candidate BBs.
  std::vector<BasicBlock *> BBs;
  for (BasicBlock &BB : F)
    BBs.push_back(&BB);

  // BBs.size() will change within the loop, so we query it every time
  for (unsigned i = 0; i < BBs.size(); i++) {
    BasicBlock *BB = BBs[i];
    for (Instruction &I : *BB) {
      assert(!isa<InvokeInst>(&I));
      auto *CI = dyn_cast<CallInst>(&I);
      if (!CI)
        continue;

      const Value *Callee = CI->getCalledValue();
      if (!canLongjmp(M, Callee))
        continue;

      Value *Threw = nullptr;
      BasicBlock *Tail;
      if (Callee->getName().startswith(InvokePrefix)) {
        // If invoke wrapper has already been generated for this call in
        // previous EH phase, search for the load instruction
        // %__THREW__.val = __THREW__;
        // in postamble after the invoke wrapper call
        LoadInst *ThrewLI = nullptr;
        StoreInst *ThrewResetSI = nullptr;
        for (auto I = std::next(BasicBlock::iterator(CI)), IE = BB->end();
             I != IE; ++I) {
          if (auto *LI = dyn_cast<LoadInst>(I))
            if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()))
              if (GV == ThrewGV) {
                Threw = ThrewLI = LI;
                break;
              }
        }
        // Search for the store instruction after the load above
        // __THREW__ = 0;
        for (auto I = std::next(BasicBlock::iterator(ThrewLI)), IE = BB->end();
             I != IE; ++I) {
          if (auto *SI = dyn_cast<StoreInst>(I))
            if (auto *GV = dyn_cast<GlobalVariable>(SI->getPointerOperand()))
              if (GV == ThrewGV && SI->getValueOperand() == IRB.getInt32(0)) {
                ThrewResetSI = SI;
                break;
              }
        }
        assert(Threw && ThrewLI && "Cannot find __THREW__ load after invoke");
        assert(ThrewResetSI && "Cannot find __THREW__ store after invoke");
        Tail = SplitBlock(BB, ThrewResetSI->getNextNode());

      } else {
        // Wrap call with invoke wrapper and generate preamble/postamble
        Threw = wrapInvoke(CI);
        ToErase.push_back(CI);
        Tail = SplitBlock(BB, CI->getNextNode());
      }

      // We need to replace the terminator in Tail - SplitBlock makes BB go
      // straight to Tail, we need to check if a longjmp occurred, and go to the
      // right setjmp-tail if so
      ToErase.push_back(BB->getTerminator());

      // Generate a function call to testSetjmp function and preamble/postamble
      // code to figure out (1) whether longjmp occurred (2) if longjmp
      // occurred, which setjmp it corresponds to
      Value *Label = nullptr;
      Value *LongjmpResult = nullptr;
      BasicBlock *EndBB = nullptr;
      wrapTestSetjmp(BB, CI, Threw, SetjmpTable, SetjmpTableSize, Label,
                     LongjmpResult, EndBB);
      assert(Label && LongjmpResult && EndBB);

      // Create switch instruction
      IRB.SetInsertPoint(EndBB);
      SwitchInst *SI = IRB.CreateSwitch(Label, Tail, SetjmpRetPHIs.size());
      // -1 means no longjmp happened, continue normally (will hit the default
      // switch case). 0 means a longjmp that is not ours to handle, needs a
      // rethrow. Otherwise the index is the same as the index in P+1 (to avoid
      // 0).
      for (unsigned i = 0; i < SetjmpRetPHIs.size(); i++) {
        SI->addCase(IRB.getInt32(i + 1), SetjmpRetPHIs[i]->getParent());
        SetjmpRetPHIs[i]->addIncoming(LongjmpResult, EndBB);
      }

      // We are splitting the block here, and must continue to find other calls
      // in the block - which is now split. so continue to traverse in the Tail
      BBs.push_back(Tail);
    }
  }

  // Erase everything we no longer need in this function
  for (Instruction *I : ToErase)
    I->eraseFromParent();

  // Free setjmpTable buffer before each return instruction
  for (BasicBlock &BB : F) {
    TerminatorInst *TI = BB.getTerminator();
    if (isa<ReturnInst>(TI))
      CallInst::CreateFree(SetjmpTable, TI);
  }

  // Every call to saveSetjmp can change setjmpTable and setjmpTableSize
  // (when buffer reallocation occurs)
  // entry:
  //   setjmpTableSize = 4;
  //   setjmpTable = (int *) malloc(40);
  //   setjmpTable[0] = 0;
  // ...
  // somebb:
  //   setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
  //   setjmpTableSize = __tempRet0;
  // So we need to make sure the SSA for these variables is valid so that every
  // saveSetjmp and testSetjmp calls have the correct arguments.
  SSAUpdater SetjmpTableSSA;
  SSAUpdater SetjmpTableSizeSSA;
  SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable");
  SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize");
  for (Instruction *I : SetjmpTableInsts)
    SetjmpTableSSA.AddAvailableValue(I->getParent(), I);
  for (Instruction *I : SetjmpTableSizeInsts)
    SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I);

  for (auto UI = SetjmpTable->use_begin(), UE = SetjmpTable->use_end();
       UI != UE;) {
    // Grab the use before incrementing the iterator.
    Use &U = *UI;
    // Increment the iterator before removing the use from the list.
    ++UI;
    if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
      if (I->getParent() != &EntryBB)
        SetjmpTableSSA.RewriteUse(U);
  }
  for (auto UI = SetjmpTableSize->use_begin(), UE = SetjmpTableSize->use_end();
       UI != UE;) {
    Use &U = *UI;
    ++UI;
    if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
      if (I->getParent() != &EntryBB)
        SetjmpTableSizeSSA.RewriteUse(U);
  }

  // Finally, our modifications to the cfg can break dominance of SSA variables.
  // For example, in this code,
  // if (x()) { .. setjmp() .. }
  // if (y()) { .. longjmp() .. }
  // We must split the longjmp block, and it can jump into the block splitted
  // from setjmp one. But that means that when we split the setjmp block, it's
  // first part no longer dominates its second part - there is a theoretically
  // possible control flow path where x() is false, then y() is true and we
  // reach the second part of the setjmp block, without ever reaching the first
  // part. So, we rebuild SSA form here.
  rebuildSSA(F);
  return true;
}
Пример #9
0
/// InlineFunction - This function inlines the called function into the basic
/// block of the caller.  This returns false if it is not possible to inline
/// this call.  The program is still in a well defined state if this occurs
/// though.
///
/// Note that this only does one level of inlining.  For example, if the
/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
/// exists in the instruction stream.  Similarly this will inline a recursive
/// function by one level.
bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
                          bool InsertLifetime) {
  Instruction *TheCall = CS.getInstruction();
  assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
         "Instruction not in function!");

  // If IFI has any state in it, zap it before we fill it in.
  IFI.reset();
  
  const Function *CalledFunc = CS.getCalledFunction();
  if (CalledFunc == 0 ||          // Can't inline external function or indirect
      CalledFunc->isDeclaration() || // call, or call to a vararg function!
      CalledFunc->getFunctionType()->isVarArg()) return false;

  // If the call to the callee is not a tail call, we must clear the 'tail'
  // flags on any calls that we inline.
  bool MustClearTailCallFlags =
    !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());

  // If the call to the callee cannot throw, set the 'nounwind' flag on any
  // calls that we inline.
  bool MarkNoUnwind = CS.doesNotThrow();

  BasicBlock *OrigBB = TheCall->getParent();
  Function *Caller = OrigBB->getParent();

  // GC poses two hazards to inlining, which only occur when the callee has GC:
  //  1. If the caller has no GC, then the callee's GC must be propagated to the
  //     caller.
  //  2. If the caller has a differing GC, it is invalid to inline.
  if (CalledFunc->hasGC()) {
    if (!Caller->hasGC())
      Caller->setGC(CalledFunc->getGC());
    else if (CalledFunc->getGC() != Caller->getGC())
      return false;
  }

  // Get the personality function from the callee if it contains a landing pad.
  Value *CalleePersonality = 0;
  for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
       I != E; ++I)
    if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
      const BasicBlock *BB = II->getUnwindDest();
      const LandingPadInst *LP = BB->getLandingPadInst();
      CalleePersonality = LP->getPersonalityFn();
      break;
    }

  // Find the personality function used by the landing pads of the caller. If it
  // exists, then check to see that it matches the personality function used in
  // the callee.
  if (CalleePersonality) {
    for (Function::const_iterator I = Caller->begin(), E = Caller->end();
         I != E; ++I)
      if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
        const BasicBlock *BB = II->getUnwindDest();
        const LandingPadInst *LP = BB->getLandingPadInst();

        // If the personality functions match, then we can perform the
        // inlining. Otherwise, we can't inline.
        // TODO: This isn't 100% true. Some personality functions are proper
        //       supersets of others and can be used in place of the other.
        if (LP->getPersonalityFn() != CalleePersonality)
          return false;

        break;
      }
  }

  // Get an iterator to the last basic block in the function, which will have
  // the new function inlined after it.
  Function::iterator LastBlock = &Caller->back();

  // Make sure to capture all of the return instructions from the cloned
  // function.
  SmallVector<ReturnInst*, 8> Returns;
  ClonedCodeInfo InlinedFunctionInfo;
  Function::iterator FirstNewBlock;

  { // Scope to destroy VMap after cloning.
    ValueToValueMapTy VMap;

    assert(CalledFunc->arg_size() == CS.arg_size() &&
           "No varargs calls can be inlined!");

    // Calculate the vector of arguments to pass into the function cloner, which
    // matches up the formal to the actual argument values.
    CallSite::arg_iterator AI = CS.arg_begin();
    unsigned ArgNo = 0;
    for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
         E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
      Value *ActualArg = *AI;

      // When byval arguments actually inlined, we need to make the copy implied
      // by them explicit.  However, we don't do this if the callee is readonly
      // or readnone, because the copy would be unneeded: the callee doesn't
      // modify the struct.
      if (CS.isByValArgument(ArgNo)) {
        ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
                                        CalledFunc->getParamAlignment(ArgNo+1));
 
        // Calls that we inline may use the new alloca, so we need to clear
        // their 'tail' flags if HandleByValArgument introduced a new alloca and
        // the callee has calls.
        MustClearTailCallFlags |= ActualArg != *AI;
      }

      VMap[I] = ActualArg;
    }

    // We want the inliner to prune the code as it copies.  We would LOVE to
    // have no dead or constant instructions leftover after inlining occurs
    // (which can happen, e.g., because an argument was constant), but we'll be
    // happy with whatever the cloner can do.
    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 
                              /*ModuleLevelChanges=*/false, Returns, ".i",
                              &InlinedFunctionInfo, IFI.TD, TheCall);

    // Remember the first block that is newly cloned over.
    FirstNewBlock = LastBlock; ++FirstNewBlock;

    // Update the callgraph if requested.
    if (IFI.CG)
      UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);

    // Update inlined instructions' line number information.
    fixupLineNumbers(Caller, FirstNewBlock, TheCall);
  }

  // If there are any alloca instructions in the block that used to be the entry
  // block for the callee, move them to the entry block of the caller.  First
  // calculate which instruction they should be inserted before.  We insert the
  // instructions at the end of the current alloca list.
  {
    BasicBlock::iterator InsertPoint = Caller->begin()->begin();
    for (BasicBlock::iterator I = FirstNewBlock->begin(),
         E = FirstNewBlock->end(); I != E; ) {
      AllocaInst *AI = dyn_cast<AllocaInst>(I++);
      if (AI == 0) continue;
      
      // If the alloca is now dead, remove it.  This often occurs due to code
      // specialization.
      if (AI->use_empty()) {
        AI->eraseFromParent();
        continue;
      }

      if (!isa<Constant>(AI->getArraySize()))
        continue;
      
      // Keep track of the static allocas that we inline into the caller.
      IFI.StaticAllocas.push_back(AI);
      
      // Scan for the block of allocas that we can move over, and move them
      // all at once.
      while (isa<AllocaInst>(I) &&
             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
        IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
        ++I;
      }

      // Transfer all of the allocas over in a block.  Using splice means
      // that the instructions aren't removed from the symbol table, then
      // reinserted.
      Caller->getEntryBlock().getInstList().splice(InsertPoint,
                                                   FirstNewBlock->getInstList(),
                                                   AI, I);
    }
  }

  // Leave lifetime markers for the static alloca's, scoping them to the
  // function we just inlined.
  if (InsertLifetime && !IFI.StaticAllocas.empty()) {
    IRBuilder<> builder(FirstNewBlock->begin());
    for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
      AllocaInst *AI = IFI.StaticAllocas[ai];

      // If the alloca is already scoped to something smaller than the whole
      // function then there's no need to add redundant, less accurate markers.
      if (hasLifetimeMarkers(AI))
        continue;

      builder.CreateLifetimeStart(AI);
      for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
        IRBuilder<> builder(Returns[ri]);
        builder.CreateLifetimeEnd(AI);
      }
    }
  }

  // If the inlined code contained dynamic alloca instructions, wrap the inlined
  // code with llvm.stacksave/llvm.stackrestore intrinsics.
  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
    Module *M = Caller->getParent();
    // Get the two intrinsics we care about.
    Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
    Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);

    // Insert the llvm.stacksave.
    CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
      .CreateCall(StackSave, "savedstack");

    // Insert a call to llvm.stackrestore before any return instructions in the
    // inlined function.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
    }
  }

  // If we are inlining tail call instruction through a call site that isn't
  // marked 'tail', we must remove the tail marker for any calls in the inlined
  // code.  Also, calls inlined through a 'nounwind' call site should be marked
  // 'nounwind'.
  if (InlinedFunctionInfo.ContainsCalls &&
      (MustClearTailCallFlags || MarkNoUnwind)) {
    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
         BB != E; ++BB)
      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
        if (CallInst *CI = dyn_cast<CallInst>(I)) {
          if (MustClearTailCallFlags)
            CI->setTailCall(false);
          if (MarkNoUnwind)
            CI->setDoesNotThrow();
        }
  }

  // If we are inlining for an invoke instruction, we must make sure to rewrite
  // any call instructions into invoke instructions.
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);

  // If we cloned in _exactly one_ basic block, and if that block ends in a
  // return instruction, we splice the body of the inlined callee directly into
  // the calling basic block.
  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
    // Move all of the instructions right before the call.
    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
                                 FirstNewBlock->begin(), FirstNewBlock->end());
    // Remove the cloned basic block.
    Caller->getBasicBlockList().pop_back();

    // If the call site was an invoke instruction, add a branch to the normal
    // destination.
    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
      BranchInst::Create(II->getNormalDest(), TheCall);

    // If the return instruction returned a value, replace uses of the call with
    // uses of the returned value.
    if (!TheCall->use_empty()) {
      ReturnInst *R = Returns[0];
      if (TheCall == R->getReturnValue())
        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
      else
        TheCall->replaceAllUsesWith(R->getReturnValue());
    }
    // Since we are now done with the Call/Invoke, we can delete it.
    TheCall->eraseFromParent();

    // Since we are now done with the return instruction, delete it also.
    Returns[0]->eraseFromParent();

    // We are now done with the inlining.
    return true;
  }

  // Otherwise, we have the normal case, of more than one block to inline or
  // multiple return sites.

  // We want to clone the entire callee function into the hole between the
  // "starter" and "ender" blocks.  How we accomplish this depends on whether
  // this is an invoke instruction or a call instruction.
  BasicBlock *AfterCallBB;
  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {

    // Add an unconditional branch to make this look like the CallInst case...
    BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);

    // Split the basic block.  This guarantees that no PHI nodes will have to be
    // updated due to new incoming edges, and make the invoke case more
    // symmetric to the call case.
    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
                                          CalledFunc->getName()+".exit");

  } else {  // It's a call
    // If this is a call instruction, we need to split the basic block that
    // the call lives in.
    //
    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
                                          CalledFunc->getName()+".exit");
  }

  // Change the branch that used to go to AfterCallBB to branch to the first
  // basic block of the inlined function.
  //
  TerminatorInst *Br = OrigBB->getTerminator();
  assert(Br && Br->getOpcode() == Instruction::Br &&
         "splitBasicBlock broken!");
  Br->setOperand(0, FirstNewBlock);


  // Now that the function is correct, make it a little bit nicer.  In
  // particular, move the basic blocks inserted from the end of the function
  // into the space made by splitting the source basic block.
  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
                                     FirstNewBlock, Caller->end());

  // Handle all of the return instructions that we just cloned in, and eliminate
  // any users of the original call/invoke instruction.
  Type *RTy = CalledFunc->getReturnType();

  PHINode *PHI = 0;
  if (Returns.size() > 1) {
    // The PHI node should go at the front of the new basic block to merge all
    // possible incoming values.
    if (!TheCall->use_empty()) {
      PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
                            AfterCallBB->begin());
      // Anything that used the result of the function call should now use the
      // PHI node as their operand.
      TheCall->replaceAllUsesWith(PHI);
    }

    // Loop over all of the return instructions adding entries to the PHI node
    // as appropriate.
    if (PHI) {
      for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
        ReturnInst *RI = Returns[i];
        assert(RI->getReturnValue()->getType() == PHI->getType() &&
               "Ret value not consistent in function!");
        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
      }
    }


    // Add a branch to the merge points and remove return instructions.
    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
      ReturnInst *RI = Returns[i];
      BranchInst::Create(AfterCallBB, RI);
      RI->eraseFromParent();
    }
  } else if (!Returns.empty()) {
    // Otherwise, if there is exactly one return value, just replace anything
    // using the return value of the call with the computed value.
    if (!TheCall->use_empty()) {
      if (TheCall == Returns[0]->getReturnValue())
        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
      else
        TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
    }

    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
    BasicBlock *ReturnBB = Returns[0]->getParent();
    ReturnBB->replaceAllUsesWith(AfterCallBB);

    // Splice the code from the return block into the block that it will return
    // to, which contains the code that was after the call.
    AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                      ReturnBB->getInstList());

    // Delete the return instruction now and empty ReturnBB now.
    Returns[0]->eraseFromParent();
    ReturnBB->eraseFromParent();
  } else if (!TheCall->use_empty()) {
    // No returns, but something is using the return value of the call.  Just
    // nuke the result.
    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
  }

  // Since we are now done with the Call/Invoke, we can delete it.
  TheCall->eraseFromParent();

  // We should always be able to fold the entry block of the function into the
  // single predecessor of the block...
  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);

  // Splice the code entry block into calling block, right before the
  // unconditional branch.
  CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());

  // Remove the unconditional branch.
  OrigBB->getInstList().erase(Br);

  // Now we can remove the CalleeEntry block, which is now empty.
  Caller->getBasicBlockList().erase(CalleeEntry);

  // If we inserted a phi node, check to see if it has a single value (e.g. all
  // the entries are the same or undef).  If so, remove the PHI so it doesn't
  // block other optimizations.
  if (PHI) {
    if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
      PHI->replaceAllUsesWith(V);
      PHI->eraseFromParent();
    }
  }

  return true;
}
Пример #10
0
/// UnswitchNontrivialCondition - We determined that the loop is profitable
/// to unswitch when LIC equal Val.  Split it into loop versions and test the
/// condition outside of either loop.  Return the loops created as Out1/Out2.
void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
                                               Loop *L) {
  Function *F = loopHeader->getParent();
  DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
        << loopHeader->getName() << " [" << L->getBlocks().size()
        << " blocks] in Function " << F->getName()
        << " when '" << *Val << "' == " << *LIC << "\n");

  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
    SE->forgetLoop(L);

  LoopBlocks.clear();
  NewBlocks.clear();

  // First step, split the preheader and exit blocks, and add these blocks to
  // the LoopBlocks list.
  BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
  LoopBlocks.push_back(NewPreheader);

  // We want the loop to come after the preheader, but before the exit blocks.
  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());

  SmallVector<BasicBlock*, 8> ExitBlocks;
  L->getUniqueExitBlocks(ExitBlocks);

  // Split all of the edges from inside the loop to their exit blocks.  Update
  // the appropriate Phi nodes as we do so.
  SplitExitEdges(L, ExitBlocks);

  // The exit blocks may have been changed due to edge splitting, recompute.
  ExitBlocks.clear();
  L->getUniqueExitBlocks(ExitBlocks);

  // Add exit blocks to the loop blocks.
  LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());

  // Next step, clone all of the basic blocks that make up the loop (including
  // the loop preheader and exit blocks), keeping track of the mapping between
  // the instructions and blocks.
  NewBlocks.reserve(LoopBlocks.size());
  ValueToValueMapTy VMap;
  for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
    BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);

    NewBlocks.push_back(NewBB);
    VMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
    LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
  }

  // Splice the newly inserted blocks into the function right before the
  // original preheader.
  F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
                                NewBlocks[0], F->end());

  // Now we create the new Loop object for the versioned loop.
  Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);

  // Recalculate unswitching quota, inherit simplified switches info for NewBB,
  // Probably clone more loop-unswitch related loop properties.
  BranchesInfo.cloneData(NewLoop, L, VMap);

  Loop *ParentLoop = L->getParentLoop();
  if (ParentLoop) {
    // Make sure to add the cloned preheader and exit blocks to the parent loop
    // as well.
    ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
  }

  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
    BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
    // The new exit block should be in the same loop as the old one.
    if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
      ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());

    assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
           "Exit block should have been split to have one successor!");
    BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);

    // If the successor of the exit block had PHI nodes, add an entry for
    // NewExit.
    PHINode *PN;
    for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
      PN = cast<PHINode>(I);
      Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
      ValueToValueMapTy::iterator It = VMap.find(V);
      if (It != VMap.end()) V = It->second;
      PN->addIncoming(V, NewExit);
    }

    if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
      PN = PHINode::Create(LPad->getType(), 0, "",
                           ExitSucc->getFirstInsertionPt());

      for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
           I != E; ++I) {
        BasicBlock *BB = *I;
        LandingPadInst *LPI = BB->getLandingPadInst();
        LPI->replaceAllUsesWith(PN);
        PN->addIncoming(LPI, BB);
      }
    }
  }

  // Rewrite the code to refer to itself.
  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
    for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
      RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);

  // Rewrite the original preheader to select between versions of the loop.
  BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
         "Preheader splitting did not work correctly!");

  // Emit the new branch that selects between the two versions of this loop.
  EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
  LPM->deleteSimpleAnalysisValue(OldBR, L);
  OldBR->eraseFromParent();

  LoopProcessWorklist.push_back(NewLoop);
  redoLoop = true;

  // Keep a WeakVH holding onto LIC.  If the first call to RewriteLoopBody
  // deletes the instruction (for example by simplifying a PHI that feeds into
  // the condition that we're unswitching on), we don't rewrite the second
  // iteration.
  WeakVH LICHandle(LIC);

  // Now we rewrite the original code to know that the condition is true and the
  // new code to know that the condition is false.
  RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);

  // It's possible that simplifying one loop could cause the other to be
  // changed to another value or a constant.  If its a constant, don't simplify
  // it.
  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
      LICHandle && !isa<Constant>(LICHandle))
    RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
}
Пример #11
0
bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
                                       BasicBlock *&OldEntry,
                                       bool &TailCallsAreMarkedTail,
                                       SmallVectorImpl<PHINode *> &ArgumentPHIs,
                                       bool CannotTailCallElimCallsMarkedTail) {
  // If we are introducing accumulator recursion to eliminate operations after
  // the call instruction that are both associative and commutative, the initial
  // value for the accumulator is placed in this variable.  If this value is set
  // then we actually perform accumulator recursion elimination instead of
  // simple tail recursion elimination.  If the operation is an LLVM instruction
  // (eg: "add") then it is recorded in AccumulatorRecursionInstr.  If not, then
  // we are handling the case when the return instruction returns a constant C
  // which is different to the constant returned by other return instructions
  // (which is recorded in AccumulatorRecursionEliminationInitVal).  This is a
  // special case of accumulator recursion, the operation being "return C".
  Value *AccumulatorRecursionEliminationInitVal = 0;
  Instruction *AccumulatorRecursionInstr = 0;

  // Ok, we found a potential tail call.  We can currently only transform the
  // tail call if all of the instructions between the call and the return are
  // movable to above the call itself, leaving the call next to the return.
  // Check that this is the case now.
  BasicBlock::iterator BBI = CI;
  for (++BBI; &*BBI != Ret; ++BBI) {
    if (CanMoveAboveCall(BBI, CI)) continue;

    // If we can't move the instruction above the call, it might be because it
    // is an associative and commutative operation that could be transformed
    // using accumulator recursion elimination.  Check to see if this is the
    // case, and if so, remember the initial accumulator value for later.
    if ((AccumulatorRecursionEliminationInitVal =
                           CanTransformAccumulatorRecursion(BBI, CI))) {
      // Yes, this is accumulator recursion.  Remember which instruction
      // accumulates.
      AccumulatorRecursionInstr = BBI;
    } else {
      return false;   // Otherwise, we cannot eliminate the tail recursion!
    }
  }

  // We can only transform call/return pairs that either ignore the return value
  // of the call and return void, ignore the value of the call and return a
  // constant, return the value returned by the tail call, or that are being
  // accumulator recursion variable eliminated.
  if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
      !isa<UndefValue>(Ret->getReturnValue()) &&
      AccumulatorRecursionEliminationInitVal == 0 &&
      !getCommonReturnValue(0, CI)) {
    // One case remains that we are able to handle: the current return
    // instruction returns a constant, and all other return instructions
    // return a different constant.
    if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret))
      return false; // Current return instruction does not return a constant.
    // Check that all other return instructions return a common constant.  If
    // so, record it in AccumulatorRecursionEliminationInitVal.
    AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI);
    if (!AccumulatorRecursionEliminationInitVal)
      return false;
  }

  BasicBlock *BB = Ret->getParent();
  Function *F = BB->getParent();

  // OK! We can transform this tail call.  If this is the first one found,
  // create the new entry block, allowing us to branch back to the old entry.
  if (OldEntry == 0) {
    OldEntry = &F->getEntryBlock();
    BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
    NewEntry->takeName(OldEntry);
    OldEntry->setName("tailrecurse");
    BranchInst::Create(OldEntry, NewEntry);

    // If this tail call is marked 'tail' and if there are any allocas in the
    // entry block, move them up to the new entry block.
    TailCallsAreMarkedTail = CI->isTailCall();
    if (TailCallsAreMarkedTail)
      // Move all fixed sized allocas from OldEntry to NewEntry.
      for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(),
             NEBI = NewEntry->begin(); OEBI != E; )
        if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
          if (isa<ConstantInt>(AI->getArraySize()))
            AI->moveBefore(NEBI);

    // Now that we have created a new block, which jumps to the entry
    // block, insert a PHI node for each argument of the function.
    // For now, we initialize each PHI to only have the real arguments
    // which are passed in.
    Instruction *InsertPos = OldEntry->begin();
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
         I != E; ++I) {
      PHINode *PN = PHINode::Create(I->getType(), 2,
                                    I->getName() + ".tr", InsertPos);
      I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
      PN->addIncoming(I, NewEntry);
      ArgumentPHIs.push_back(PN);
    }
  }

  // If this function has self recursive calls in the tail position where some
  // are marked tail and some are not, only transform one flavor or another.  We
  // have to choose whether we move allocas in the entry block to the new entry
  // block or not, so we can't make a good choice for both.  NOTE: We could do
  // slightly better here in the case that the function has no entry block
  // allocas.
  if (TailCallsAreMarkedTail && !CI->isTailCall())
    return false;

  // Ok, now that we know we have a pseudo-entry block WITH all of the
  // required PHI nodes, add entries into the PHI node for the actual
  // parameters passed into the tail-recursive call.
  for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
    ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);

  // If we are introducing an accumulator variable to eliminate the recursion,
  // do so now.  Note that we _know_ that no subsequent tail recursion
  // eliminations will happen on this function because of the way the
  // accumulator recursion predicate is set up.
  //
  if (AccumulatorRecursionEliminationInitVal) {
    Instruction *AccRecInstr = AccumulatorRecursionInstr;
    // Start by inserting a new PHI node for the accumulator.
    pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
    PHINode *AccPN =
      PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
                      std::distance(PB, PE) + 1,
                      "accumulator.tr", OldEntry->begin());

    // Loop over all of the predecessors of the tail recursion block.  For the
    // real entry into the function we seed the PHI with the initial value,
    // computed earlier.  For any other existing branches to this block (due to
    // other tail recursions eliminated) the accumulator is not modified.
    // Because we haven't added the branch in the current block to OldEntry yet,
    // it will not show up as a predecessor.
    for (pred_iterator PI = PB; PI != PE; ++PI) {
      BasicBlock *P = *PI;
      if (P == &F->getEntryBlock())
        AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
      else
        AccPN->addIncoming(AccPN, P);
    }

    if (AccRecInstr) {
      // Add an incoming argument for the current block, which is computed by
      // our associative and commutative accumulator instruction.
      AccPN->addIncoming(AccRecInstr, BB);

      // Next, rewrite the accumulator recursion instruction so that it does not
      // use the result of the call anymore, instead, use the PHI node we just
      // inserted.
      AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
    } else {
      // Add an incoming argument for the current block, which is just the
      // constant returned by the current return instruction.
      AccPN->addIncoming(Ret->getReturnValue(), BB);
    }

    // Finally, rewrite any return instructions in the program to return the PHI
    // node instead of the "initval" that they do currently.  This loop will
    // actually rewrite the return value we are destroying, but that's ok.
    for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
      if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
        RI->setOperand(0, AccPN);
    ++NumAccumAdded;
  }

  // Now that all of the PHI nodes are in place, remove the call and
  // ret instructions, replacing them with an unconditional branch.
  BranchInst *NewBI = BranchInst::Create(OldEntry, Ret);
  NewBI->setDebugLoc(CI->getDebugLoc());

  BB->getInstList().erase(Ret);  // Remove return.
  BB->getInstList().erase(CI);   // Remove call.
  ++NumEliminated;
  return true;
}
Пример #12
0
/// SimplifyStoreAtEndOfBlock - Turn things like:
///   if () { *P = v1; } else { *P = v2 }
/// into a phi node with a store in the successor.
///
/// Simplify things like:
///   *P = v1; if () { *P = v2; }
/// into a phi node with a store in the successor.
///
bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
  BasicBlock *StoreBB = SI.getParent();

  // Check to see if the successor block has exactly two incoming edges.  If
  // so, see if the other predecessor contains a store to the same location.
  // if so, insert a PHI node (if needed) and move the stores down.
  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);

  // Determine whether Dest has exactly two predecessors and, if so, compute
  // the other predecessor.
  pred_iterator PI = pred_begin(DestBB);
  BasicBlock *P = *PI;
  BasicBlock *OtherBB = nullptr;

  if (P != StoreBB)
    OtherBB = P;

  if (++PI == pred_end(DestBB))
    return false;

  P = *PI;
  if (P != StoreBB) {
    if (OtherBB)
      return false;
    OtherBB = P;
  }
  if (++PI != pred_end(DestBB))
    return false;

  // Bail out if all the relevant blocks aren't distinct (this can happen,
  // for example, if SI is in an infinite loop)
  if (StoreBB == DestBB || OtherBB == DestBB)
    return false;

  // Verify that the other block ends in a branch and is not otherwise empty.
  BasicBlock::iterator BBI(OtherBB->getTerminator());
  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
  if (!OtherBr || BBI == OtherBB->begin())
    return false;

  // If the other block ends in an unconditional branch, check for the 'if then
  // else' case.  there is an instruction before the branch.
  StoreInst *OtherStore = nullptr;
  if (OtherBr->isUnconditional()) {
    --BBI;
    // Skip over debugging info.
    while (isa<DbgInfoIntrinsic>(BBI) ||
           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
      if (BBI==OtherBB->begin())
        return false;
      --BBI;
    }
    // If this isn't a store, isn't a store to the same location, or is not the
    // right kind of store, bail out.
    OtherStore = dyn_cast<StoreInst>(BBI);
    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
        !SI.isSameOperationAs(OtherStore))
      return false;
  } else {
    // Otherwise, the other block ended with a conditional branch. If one of the
    // destinations is StoreBB, then we have the if/then case.
    if (OtherBr->getSuccessor(0) != StoreBB &&
        OtherBr->getSuccessor(1) != StoreBB)
      return false;

    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
    // if/then triangle.  See if there is a store to the same ptr as SI that
    // lives in OtherBB.
    for (;; --BBI) {
      // Check to see if we find the matching store.
      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
            !SI.isSameOperationAs(OtherStore))
          return false;
        break;
      }
      // If we find something that may be using or overwriting the stored
      // value, or if we run out of instructions, we can't do the xform.
      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
          BBI == OtherBB->begin())
        return false;
    }

    // In order to eliminate the store in OtherBr, we have to
    // make sure nothing reads or overwrites the stored value in
    // StoreBB.
    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
      // FIXME: This should really be AA driven.
      if (I->mayReadFromMemory() || I->mayWriteToMemory())
        return false;
    }
  }

  // Insert a PHI node now if we need it.
  Value *MergedVal = OtherStore->getOperand(0);
  if (MergedVal != SI.getOperand(0)) {
    PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
    PN->addIncoming(SI.getOperand(0), SI.getParent());
    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
    MergedVal = InsertNewInstBefore(PN, DestBB->front());
  }

  // Advance to a place where it is safe to insert the new store and
  // insert it.
  BBI = DestBB->getFirstInsertionPt();
  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
                                   SI.isVolatile(),
                                   SI.getAlignment(),
                                   SI.getOrdering(),
                                   SI.getSynchScope());
  InsertNewInstBefore(NewSI, *BBI);
  NewSI->setDebugLoc(OtherStore->getDebugLoc());

  // If the two stores had AA tags, merge them.
  AAMDNodes AATags;
  SI.getAAMetadata(AATags);
  if (AATags) {
    OtherStore->getAAMetadata(AATags, /* Merge = */ true);
    NewSI->setAAMetadata(AATags);
  }

  // Nuke the old stores.
  EraseInstFromFunction(SI);
  EraseInstFromFunction(*OtherStore);
  return true;
}
Пример #13
0
// If we have a PHI node with a vector type that has only 2 uses: feed
// itself and be an operand of extractelemnt at a constant location,
// try to replace the PHI of the vector type with a PHI of a scalar type
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
  // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
  if (!PN->hasNUses(2))
    return NULL;

  // If so, it's known at this point that one operand is PHI and the other is
  // an extractelement node. Find the PHI user that is not the extractelement
  // node.
  Value::use_iterator iu = PN->use_begin();
  Instruction *PHIUser = dyn_cast<Instruction>(*iu);
  if (PHIUser == cast<Instruction>(&EI))
    PHIUser = cast<Instruction>(*(++iu));

  // Verify that this PHI user has one use, which is the PHI itself,
  // and that it is a binary operation which is cheap to scalarize.
  // otherwise return NULL.
  if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
    !(isa<BinaryOperator>(PHIUser)) ||
    !CheapToScalarize(PHIUser, true))
    return NULL;

  // Create a scalar PHI node that will replace the vector PHI node
  // just before the current PHI node.
  PHINode * scalarPHI = cast<PHINode>(
    InsertNewInstWith(PHINode::Create(EI.getType(),
    PN->getNumIncomingValues(), ""), *PN));
  // Scalarize each PHI operand.
  for (unsigned i=0; i < PN->getNumIncomingValues(); i++) {
    Value *PHIInVal = PN->getIncomingValue(i);
    BasicBlock *inBB = PN->getIncomingBlock(i);
    Value *Elt = EI.getIndexOperand();
    // If the operand is the PHI induction variable:
    if (PHIInVal == PHIUser) {
      // Scalarize the binary operation. Its first operand is the
      // scalar PHI and the second operand is extracted from the other
      // vector operand.
      BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
      unsigned opId = (B0->getOperand(0) == PN) ? 1: 0;
      Value *Op = Builder->CreateExtractElement(
        B0->getOperand(opId), Elt, B0->getOperand(opId)->getName()+".Elt");
      Value *newPHIUser = InsertNewInstWith(
        BinaryOperator::Create(B0->getOpcode(), scalarPHI,Op),
        *B0);
      scalarPHI->addIncoming(newPHIUser, inBB);
    } else {
      // Scalarize PHI input:
      Instruction *newEI =
        ExtractElementInst::Create(PHIInVal, Elt, "");
      // Insert the new instruction into the predecessor basic block.
      Instruction *pos = dyn_cast<Instruction>(PHIInVal);
      BasicBlock::iterator InsertPos;
      if (pos && !isa<PHINode>(pos)) {
        InsertPos = pos;
        ++InsertPos;
      } else {
        InsertPos = inBB->getFirstInsertionPt();
      }

      InsertNewInstWith(newEI, *InsertPos);

      scalarPHI->addIncoming(newEI, inBB);
    }
  }
  return ReplaceInstUsesWith(EI, scalarPHI);
}
Пример #14
0
/// InsertUniqueBackedgeBlock - This method is called when the specified loop
/// has more than one backedge in it.  If this occurs, revector all of these
/// backedges to target a new basic block and have that block branch to the loop
/// header.  This ensures that loops have exactly one backedge.
///
BasicBlock *
LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
  assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");

  // Get information about the loop
  BasicBlock *Header = L->getHeader();
  Function *F = Header->getParent();

  // Unique backedge insertion currently depends on having a preheader.
  if (!Preheader)
    return 0;

  // Figure out which basic blocks contain back-edges to the loop header.
  std::vector<BasicBlock*> BackedgeBlocks;
  for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
    BasicBlock *P = *I;

    // Indirectbr edges cannot be split, so we must fail if we find one.
    if (isa<IndirectBrInst>(P->getTerminator()))
      return 0;

    if (P != Preheader) BackedgeBlocks.push_back(P);
  }

  // Create and insert the new backedge block...
  BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
                                           Header->getName()+".backedge", F);
  BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);

  DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
               << BEBlock->getName() << "\n");

  // Move the new backedge block to right after the last backedge block.
  Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
  F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);

  // Now that the block has been inserted into the function, create PHI nodes in
  // the backedge block which correspond to any PHI nodes in the header block.
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    PHINode *PN = cast<PHINode>(I);
    PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
                                     PN->getName()+".be", BETerminator);
    if (AA) AA->copyValue(PN, NewPN);

    // Loop over the PHI node, moving all entries except the one for the
    // preheader over to the new PHI node.
    unsigned PreheaderIdx = ~0U;
    bool HasUniqueIncomingValue = true;
    Value *UniqueValue = 0;
    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
      BasicBlock *IBB = PN->getIncomingBlock(i);
      Value *IV = PN->getIncomingValue(i);
      if (IBB == Preheader) {
        PreheaderIdx = i;
      } else {
        NewPN->addIncoming(IV, IBB);
        if (HasUniqueIncomingValue) {
          if (UniqueValue == 0)
            UniqueValue = IV;
          else if (UniqueValue != IV)
            HasUniqueIncomingValue = false;
        }
      }
    }

    // Delete all of the incoming values from the old PN except the preheader's
    assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
    if (PreheaderIdx != 0) {
      PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
      PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
    }
    // Nuke all entries except the zero'th.
    for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
      PN->removeIncomingValue(e-i, false);

    // Finally, add the newly constructed PHI node as the entry for the BEBlock.
    PN->addIncoming(NewPN, BEBlock);

    // As an optimization, if all incoming values in the new PhiNode (which is a
    // subset of the incoming values of the old PHI node) have the same value,
    // eliminate the PHI Node.
    if (HasUniqueIncomingValue) {
      NewPN->replaceAllUsesWith(UniqueValue);
      if (AA) AA->deleteValue(NewPN);
      BEBlock->getInstList().erase(NewPN);
    }
  }

  // Now that all of the PHI nodes have been inserted and adjusted, modify the
  // backedge blocks to just to the BEBlock instead of the header.
  for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
    TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
    for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
      if (TI->getSuccessor(Op) == Header)
        TI->setSuccessor(Op, BEBlock);
  }

  //===--- Update all analyses which we must preserve now -----------------===//

  // Update Loop Information - we know that this block is now in the current
  // loop and all parent loops.
  L->addBasicBlockToLoop(BEBlock, LI->getBase());

  // Update dominator information
  DT->splitBlock(BEBlock);

  return BEBlock;
}
Пример #15
0
/// Connect the unrolling prolog code to the original loop.
/// The unrolling prolog code contains code to execute the
/// 'extra' iterations if the run-time trip count modulo the
/// unroll count is non-zero.
///
/// This function performs the following:
/// - Create PHI nodes at prolog end block to combine values
///   that exit the prolog code and jump around the prolog.
/// - Add a PHI operand to a PHI node at the loop exit block
///   for values that exit the prolog and go around the loop.
/// - Branch around the original loop if the trip count is less
///   than the unroll factor.
///
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
                          BasicBlock *PrologExit,
                          BasicBlock *OriginalLoopLatchExit,
                          BasicBlock *PreHeader, BasicBlock *NewPreHeader,
                          ValueToValueMapTy &VMap, DominatorTree *DT,
                          LoopInfo *LI, bool PreserveLCSSA) {
  BasicBlock *Latch = L->getLoopLatch();
  assert(Latch && "Loop must have a latch");
  BasicBlock *PrologLatch = cast<BasicBlock>(VMap[Latch]);

  // Create a PHI node for each outgoing value from the original loop
  // (which means it is an outgoing value from the prolog code too).
  // The new PHI node is inserted in the prolog end basic block.
  // The new PHI node value is added as an operand of a PHI node in either
  // the loop header or the loop exit block.
  for (BasicBlock *Succ : successors(Latch)) {
    for (Instruction &BBI : *Succ) {
      PHINode *PN = dyn_cast<PHINode>(&BBI);
      // Exit when we passed all PHI nodes.
      if (!PN)
        break;
      // Add a new PHI node to the prolog end block and add the
      // appropriate incoming values.
      PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
                                       PrologExit->getFirstNonPHI());
      // Adding a value to the new PHI node from the original loop preheader.
      // This is the value that skips all the prolog code.
      if (L->contains(PN)) {
        NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader),
                           PreHeader);
      } else {
        NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader);
      }

      Value *V = PN->getIncomingValueForBlock(Latch);
      if (Instruction *I = dyn_cast<Instruction>(V)) {
        if (L->contains(I)) {
          V = VMap.lookup(I);
        }
      }
      // Adding a value to the new PHI node from the last prolog block
      // that was created.
      NewPN->addIncoming(V, PrologLatch);

      // Update the existing PHI node operand with the value from the
      // new PHI node.  How this is done depends on if the existing
      // PHI node is in the original loop block, or the exit block.
      if (L->contains(PN)) {
        PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN);
      } else {
        PN->addIncoming(NewPN, PrologExit);
      }
    }
  }

  // Make sure that created prolog loop is in simplified form
  SmallVector<BasicBlock *, 4> PrologExitPreds;
  Loop *PrologLoop = LI->getLoopFor(PrologLatch);
  if (PrologLoop) {
    for (BasicBlock *PredBB : predecessors(PrologExit))
      if (PrologLoop->contains(PredBB))
        PrologExitPreds.push_back(PredBB);

    SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
                           PreserveLCSSA);
  }

  // Create a branch around the original loop, which is taken if there are no
  // iterations remaining to be executed after running the prologue.
  Instruction *InsertPt = PrologExit->getTerminator();
  IRBuilder<> B(InsertPt);

  assert(Count != 0 && "nonsensical Count!");

  // If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
  // This means %xtraiter is (BECount + 1) and all of the iterations of this
  // loop were executed by the prologue.  Note that if BECount <u (Count - 1)
  // then (BECount + 1) cannot unsigned-overflow.
  Value *BrLoopExit =
      B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
  // Split the exit to maintain loop canonicalization guarantees
  SmallVector<BasicBlock *, 4> Preds(predecessors(OriginalLoopLatchExit));
  SplitBlockPredecessors(OriginalLoopLatchExit, Preds, ".unr-lcssa", DT, LI,
                         PreserveLCSSA);
  // Add the branch to the exit block (around the unrolled loop)
  B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
  InsertPt->eraseFromParent();
  if (DT)
    DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
}
Пример #16
0
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was successful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// TripCount is generally defined as the number of times the loop header
/// executes. UnrollLoop relaxes the definition to permit early exits: here
/// TripCount is the iteration on which control exits LatchBlock if no early
/// exits were taken. Note that UnrollLoop assumes that the loop counter test
/// terminates LatchBlock in order to remove unnecesssary instances of the
/// test. In other words, control may exit the loop prior to TripCount
/// iterations via an early branch, but control may not exit the loop from the
/// LatchBlock's terminator prior to TripCount iterations.
///
/// Similarly, TripMultiple divides the number of times that the LatchBlock may
/// execute without exiting the loop.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
///
/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
/// available it must also preserve those analyses.
bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
                      bool AllowRuntime, unsigned TripMultiple,
                      LoopInfo *LI, LPPassManager *LPM) {
  BasicBlock *Preheader = L->getLoopPreheader();
  if (!Preheader) {
    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
    return false;
  }

  BasicBlock *LatchBlock = L->getLoopLatch();
  if (!LatchBlock) {
    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
    return false;
  }

  BasicBlock *Header = L->getHeader();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
             "  Can't unroll; loop not terminated by a conditional branch.\n");
    return false;
  }

  if (Header->hasAddressTaken()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DEBUG(dbgs() <<
          "  Won't unroll loop: address of header block is taken.\n");
    return false;
  }

  if (TripCount != 0)
    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
  if (TripMultiple != 1)
    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;

  // We assume a run-time trip count if the compiler cannot
  // figure out the loop trip count and the unroll-runtime
  // flag is specified.
  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);

  if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
    return false;

  // Notify ScalarEvolution that the loop will be substantially changed,
  // if not outright eliminated.
  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
  if (SE)
    SE->forgetLoop(L);

  // If we know the trip count, we know the multiple...
  unsigned BreakoutTrip = 0;
  if (TripCount != 0) {
    BreakoutTrip = TripCount % Count;
    TripMultiple = 0;
  } else {
    // Figure out what multiple to use.
    BreakoutTrip = TripMultiple =
      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
  }

  if (CompletelyUnroll) {
    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
          << " with trip count " << TripCount << "!\n");
  } else {
    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
          << " by " << Count);
    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
    } else if (TripMultiple != 1) {
      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
    } else if (RuntimeTripCount) {
      DEBUG(dbgs() << " with run-time trip count");
    }
    DEBUG(dbgs() << "!\n");
  }

  std::vector<BasicBlock*> LoopBlocks = L->getBlocks();

  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);

  // For the first iteration of the loop, we should use the precloned values for
  // PHI nodes.  Insert associations now.
  ValueToValueMapTy LastValueMap;
  std::vector<PHINode*> OrigPHINode;
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    OrigPHINode.push_back(cast<PHINode>(I));
  }

  std::vector<BasicBlock*> Headers;
  std::vector<BasicBlock*> Latches;
  Headers.push_back(Header);
  Latches.push_back(LatchBlock);

  // The current on-the-fly SSA update requires blocks to be processed in
  // reverse postorder so that LastValueMap contains the correct value at each
  // exit.
  LoopBlocksDFS DFS(L);
  DFS.perform(LI);

  // Stash the DFS iterators before adding blocks to the loop.
  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();

  for (unsigned It = 1; It != Count; ++It) {
    std::vector<BasicBlock*> NewBlocks;

    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
      ValueToValueMapTy VMap;
      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
      Header->getParent()->getBasicBlockList().push_back(New);

      // Loop over all of the PHI nodes in the block, changing them to use the
      // incoming values from the previous block.
      if (*BB == Header)
        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
            if (It > 1 && L->contains(InValI))
              InVal = LastValueMap[InValI];
          VMap[OrigPHINode[i]] = InVal;
          New->getInstList().erase(NewPHI);
        }

      // Update our running map of newest clones
      LastValueMap[*BB] = New;
      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
           VI != VE; ++VI)
        LastValueMap[VI->first] = VI->second;

      L->addBasicBlockToLoop(New, LI->getBase());

      // Add phi entries for newly created values to all exit blocks.
      for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB);
           SI != SE; ++SI) {
        if (L->contains(*SI))
          continue;
        for (BasicBlock::iterator BBI = (*SI)->begin();
             PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
          Value *Incoming = phi->getIncomingValueForBlock(*BB);
          ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);
          if (It != LastValueMap.end())
            Incoming = It->second;
          phi->addIncoming(Incoming, New);
        }
      }
      // Keep track of new headers and latches as we create them, so that
      // we can insert the proper branches later.
      if (*BB == Header)
        Headers.push_back(New);
      if (*BB == LatchBlock)
        Latches.push_back(New);

      NewBlocks.push_back(New);
    }

    // Remap all instructions in the most recent iteration
    for (unsigned i = 0; i < NewBlocks.size(); ++i)
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
        ::RemapInstruction(I, LastValueMap);
  }

  // Loop over the PHI nodes in the original block, setting incoming values.
  for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
    PHINode *PN = OrigPHINode[i];
    if (CompletelyUnroll) {
      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
      Header->getInstList().erase(PN);
    }
    else if (Count > 1) {
      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
      // If this value was defined in the loop, take the value defined by the
      // last iteration of the loop.
      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
        if (L->contains(InValI))
          InVal = LastValueMap[InVal];
      }
      assert(Latches.back() == LastValueMap[LatchBlock] && "bad last latch");
      PN->addIncoming(InVal, Latches.back());
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // set up the branches to connect them.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    // The original branch was replicated in each unrolled iteration.
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

    // The branch destination.
    unsigned j = (i + 1) % e;
    BasicBlock *Dest = Headers[j];
    bool NeedConditional = true;

    if (RuntimeTripCount && j != 0) {
      NeedConditional = false;
    }

    // For a complete unroll, make the last iteration end with a branch
    // to the exit block.
    if (CompletelyUnroll && j == 0) {
      Dest = LoopExit;
      NeedConditional = false;
    }

    // If we know the trip count or a multiple of it, we can safely use an
    // unconditional branch for some iterations.
    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
      NeedConditional = false;
    }

    if (NeedConditional) {
      // Update the conditional branch's successor for the following
      // iteration.
      Term->setSuccessor(!ContinueOnTrue, Dest);
    } else {
      // Remove phi operands at this loop exit
      if (Dest != LoopExit) {
        BasicBlock *BB = Latches[i];
        for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
             SI != SE; ++SI) {
          if (*SI == Headers[i])
            continue;
          for (BasicBlock::iterator BBI = (*SI)->begin();
               PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) {
            Phi->removeIncomingValue(BB, false);
          }
        }
      }
      // Replace the conditional branch with an unconditional one.
      BranchInst::Create(Dest, Term);
      Term->eraseFromParent();
    }
  }

  // Merge adjacent basic blocks, if possible.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
    if (Term->isUnconditional()) {
      BasicBlock *Dest = Term->getSuccessor(0);
      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM))
        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
    }
  }

  // FIXME: Reconstruct dom info, because it is not preserved properly.
  // Incrementally updating domtree after loop unrolling would be easy.
  if (DominatorTree *DT = LPM->getAnalysisIfAvailable<DominatorTree>())
    DT->runOnFunction(*L->getHeader()->getParent());

  // Simplify any new induction variables in the partially unrolled loop.
  if (SE && !CompletelyUnroll) {
    SmallVector<WeakVH, 16> DeadInsts;
    simplifyLoopIVs(L, SE, LPM, DeadInsts);

    // Aggressively clean up dead instructions that simplifyLoopIVs already
    // identified. Any remaining should be cleaned up below.
    while (!DeadInsts.empty())
      if (Instruction *Inst =
          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
        RecursivelyDeleteTriviallyDeadInstructions(Inst);
  }

  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
      Instruction *Inst = I++;

      if (isInstructionTriviallyDead(Inst))
        (*BB)->getInstList().erase(Inst);
      else if (Value *V = SimplifyInstruction(Inst))
        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
          Inst->replaceAllUsesWith(V);
          (*BB)->getInstList().erase(Inst);
        }
    }

  NumCompletelyUnrolled += CompletelyUnroll;
  ++NumUnrolled;
  // Remove the loop from the LoopPassManager if it's completely removed.
  if (CompletelyUnroll && LPM != NULL)
    LPM->deleteLoopFromQueue(L);

  return true;
}
Пример #17
0
/// optimizeCheck - replace the given check CallInst with the check's fast
/// version if all the source memory objects can be found and it is obvious
/// that none of them have been freed at the point where the check is made.
/// Returns the new call if possible and NULL otherwise.
///
/// This currently works only with memory objects that can't be freed:
/// * global variables
/// * allocas that trivially have function scope
/// * byval arguments
///
bool ExactCheckOpt::optimizeCheck(CallInst *CI, CheckInfoType *Info) {
  // Examined values
  SmallSet<Value*, 16> Visited;
  // Potential memory objects
  SmallSet<Value*, 4> Objects;

  std::queue<Value*> Q;
  // Start from the the pointer operand
  Value *StartPtr = CI->getArgOperand(Info->PtrArgNo)->stripPointerCasts();
  Q.push(StartPtr);

  // Use BFS to find all potential memory objects
  while(!Q.empty()) {
    Value *o = Q.front()->stripPointerCasts();
    Q.pop();
    if(Visited.count(o))
      continue;
    Visited.insert(o);

    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(o)) {
      if (CE->getOpcode() == Instruction::GetElementPtr) {
        Q.push(CE->getOperand(0));
      } else {
        // Exit early if any of the objects are unsupported.
        if (!isSimpleMemoryObject(o))
          return false;
        Objects.insert(o);
      }
    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(o)) {
      Q.push(GEP->getPointerOperand());
      // It is fine to ignore the case of indexing into null with a pointer
      // because that case is invalid for LLVM-aware objects such as allocas,
      // globals, and objects pointed to by noalias pointers.
    } else if(PHINode *PHI = dyn_cast<PHINode>(o)) {
      for (unsigned i = 0, num = PHI->getNumIncomingValues(); i != num; ++i)
        Q.push(PHI->getIncomingValue(i));
    } else if (SelectInst *SI = dyn_cast<SelectInst>(o)) {
      Q.push(SI->getTrueValue());
      Q.push(SI->getFalseValue());
    } else {
      // Exit early if any of the objects are unsupported.
      if (!isSimpleMemoryObject(o))
        return false;
      Objects.insert(o);
    }
  }

  // Mapping from the initial value to the corresponding size and void pointer:
  // * memory object -> its size and pointer
  // * phi/select -> corresponding phi/select for the sizes and pointers
  // * anything else -> the corresponding size and pointer on the path
  std::map <Value*, PtrSizePair> M;

  Module &Mod = *CI->getParent()->getParent()->getParent();
  Type *SizeTy = getSizeType(Info, Mod);

  // Add non-instruction non-constant allocation object pointers to the front
  // of the function's entry block.
  BasicBlock &EntryBlock = CI->getParent()->getParent()->getEntryBlock();
  Instruction *FirstInsertionPoint = ++BasicBlock::iterator(EntryBlock.begin());

  for (SmallSet<Value*, 16>::const_iterator It = Objects.begin(),
       E = Objects.end();
       It != E;
       ++It) {
    // Obj is a memory object pointer: alloca, argument, load, callinst, etc.
    Value *Obj = *It;

    // Insert instruction-based allocation pointers just after the allocation.
    Instruction *InsertBefore = FirstInsertionPoint;
    if (Instruction *I = dyn_cast<Instruction>(Obj))
      InsertBefore = ++BasicBlock::iterator(I);
    IRBuilder<> Builder(InsertBefore);

    SizeOffsetEvalType SizeOffset = ObjSizeEval->compute(Obj);
    assert(ObjSizeEval->bothKnown(SizeOffset));
    assert(dyn_cast<ConstantInt>(SizeOffset.second)->isZero());
    Value *Size = Builder.CreateIntCast(SizeOffset.first, SizeTy,
                                        /*isSigned=*/false);

    Value *Ptr = Builder.CreatePointerCast(Obj, VoidPtrTy);
    M[Obj] = std::make_pair(Ptr, Size);
  }

  // Create the rest of the size values and object pointers.
  // The phi nodes will be finished later.
  for (SmallSet<Value*, 16>::const_iterator I = Visited.begin(),
       E = Visited.end();
       I != E;
       ++I) {
    getPtrAndSize(*I, SizeTy, M);
  }

  // Finalize the phi nodes.
  for (SmallSet<Value*, 16>::const_iterator I = Visited.begin(),
       E = Visited.end();
       I != E;
       ++I) {
    if (PHINode *PHI = dyn_cast<PHINode>(*I)) {
      assert(M.count(PHI));
      PHINode *PtrPHI = cast<PHINode>(M[PHI].first);
      PHINode *SizePHI = cast<PHINode>(M[PHI].second);

      for(unsigned i = 0, num = PHI->getNumIncomingValues(); i != num; ++i) {
        Value *IncomingValue = PHI->getIncomingValue(i)->stripPointerCasts();
        assert(M.count(IncomingValue));

        PtrPHI->addIncoming(M[IncomingValue].first, PHI->getIncomingBlock(i));
        SizePHI->addIncoming(M[IncomingValue].second, PHI->getIncomingBlock(i));
      }
    }
  }

  // Insert the fast version of the check just before the regular version.
  assert(M.count(StartPtr) && "The memory object and its size should be known");
  createFastCheck(Info, CI, M[StartPtr].first, M[StartPtr].second);
  return true;
}
Пример #18
0
Value *ForExprAST::Codegen() {
  // Output this as:
  //   ...
  //   start = startexpr
  //   goto loop
  // loop: 
  //   variable = phi [start, loopheader], [nextvariable, loopend]
  //   ...
  //   bodyexpr
  //   ...
  // loopend:
  //   step = stepexpr
  //   nextvariable = variable + step
  //   endcond = endexpr
  //   br endcond, loop, endloop
  // outloop:
  
  // Emit the start code first, without 'variable' in scope.
  Value *StartVal = Start->Codegen();
  if (StartVal == 0) return 0;
  
  // Make the new basic block for the loop header, inserting after current
  // block.
  Function *TheFunction = Builder.GetInsertBlock()->getParent();
  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
  
  // Insert an explicit fall through from the current block to the LoopBB.
  Builder.CreateBr(LoopBB);

  // Start insertion in LoopBB.
  Builder.SetInsertPoint(LoopBB);
  
  // Start the PHI node with an entry for Start.
  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
  Variable->addIncoming(StartVal, PreheaderBB);
  
  // Within the loop, the variable is defined equal to the PHI node.  If it
  // shadows an existing variable, we have to restore it, so save it now.
  Value *OldVal = NamedValues[VarName];
  NamedValues[VarName] = Variable;
  
  // Emit the body of the loop.  This, like any other expr, can change the
  // current BB.  Note that we ignore the value computed by the body, but don't
  // allow an error.
  if (Body->Codegen() == 0)
    return 0;
  
  // Emit the step value.
  Value *StepVal;
  if (Step) {
    StepVal = Step->Codegen();
    if (StepVal == 0) return 0;
  } else {
    // If not specified, use 1.0.
    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
  }
  
  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");

  // Compute the end condition.
  Value *EndCond = End->Codegen();
  if (EndCond == 0) return EndCond;
  
  // Convert condition to a bool by comparing equal to 0.0.
  EndCond = Builder.CreateFCmpONE(EndCond, 
                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                  "loopcond");
  
  // Create the "after loop" block and insert it.
  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
  
  // Insert the conditional branch into the end of LoopEndBB.
  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
  
  // Any new code will be inserted in AfterBB.
  Builder.SetInsertPoint(AfterBB);
  
  // Add a new entry to the PHI node for the backedge.
  Variable->addIncoming(NextVar, LoopEndBB);
  
  // Restore the unshadowed variable.
  if (OldVal)
    NamedValues[VarName] = OldVal;
  else
    NamedValues.erase(VarName);

  
  // for expr always returns 0.0.
  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
}
Пример #19
0
/// Connect the unrolling prolog code to the original loop.
/// The unrolling prolog code contains code to execute the
/// 'extra' iterations if the run-time trip count modulo the
/// unroll count is non-zero.
///
/// This function performs the following:
/// - Create PHI nodes at prolog end block to combine values
///   that exit the prolog code and jump around the prolog.
/// - Add a PHI operand to a PHI node at the loop exit block
///   for values that exit the prolog and go around the loop.
/// - Branch around the original loop if the trip count is less
///   than the unroll factor.
///
static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
                          BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
                          BasicBlock *OrigPH, BasicBlock *NewPH,
                          ValueToValueMapTy &VMap, AliasAnalysis *AA,
                          DominatorTree *DT, LoopInfo *LI, Pass *P) {
    BasicBlock *Latch = L->getLoopLatch();
    assert(Latch && "Loop must have a latch");

    // Create a PHI node for each outgoing value from the original loop
    // (which means it is an outgoing value from the prolog code too).
    // The new PHI node is inserted in the prolog end basic block.
    // The new PHI name is added as an operand of a PHI node in either
    // the loop header or the loop exit block.
    for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
            SBI != SBE; ++SBI) {
        for (BasicBlock::iterator BBI = (*SBI)->begin();
                PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {

            // Add a new PHI node to the prolog end block and add the
            // appropriate incoming values.
            PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
                                             PrologEnd->getTerminator());
            // Adding a value to the new PHI node from the original loop preheader.
            // This is the value that skips all the prolog code.
            if (L->contains(PN)) {
                NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
            } else {
                NewPN->addIncoming(UndefValue::get(PN->getType()), OrigPH);
            }

            Value *V = PN->getIncomingValueForBlock(Latch);
            if (Instruction *I = dyn_cast<Instruction>(V)) {
                if (L->contains(I)) {
                    V = VMap[I];
                }
            }
            // Adding a value to the new PHI node from the last prolog block
            // that was created.
            NewPN->addIncoming(V, LastPrologBB);

            // Update the existing PHI node operand with the value from the
            // new PHI node.  How this is done depends on if the existing
            // PHI node is in the original loop block, or the exit block.
            if (L->contains(PN)) {
                PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
            } else {
                PN->addIncoming(NewPN, PrologEnd);
            }
        }
    }

    // Create a branch around the orignal loop, which is taken if there are no
    // iterations remaining to be executed after running the prologue.
    Instruction *InsertPt = PrologEnd->getTerminator();
    IRBuilder<> B(InsertPt);

    assert(Count != 0 && "nonsensical Count!");

    // If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1)
    // (since Count is a power of 2).  This means %xtraiter is (BECount + 1) and
    // and all of the iterations of this loop were executed by the prologue.  Note
    // that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.
    Value *BrLoopExit =
        B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
    BasicBlock *Exit = L->getUniqueExitBlock();
    assert(Exit && "Loop must have a single exit block only");
    // Split the exit to maintain loop canonicalization guarantees
    SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
    SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI,
                           P->mustPreserveAnalysisID(LCSSAID));
    // Add the branch to the exit block (around the unrolled loop)
    B.CreateCondBr(BrLoopExit, Exit, NewPH);
    InsertPt->eraseFromParent();
}
Пример #20
0
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                      bool AllowExpensiveTripCount,
                                      bool UseEpilogRemainder,
                                      LoopInfo *LI, ScalarEvolution *SE,
                                      DominatorTree *DT, bool PreserveLCSSA) {
  // for now, only unroll loops that contain a single exit
  if (!L->getExitingBlock())
    return false;

  // Make sure the loop is in canonical form, and there is a single
  // exit block only.
  if (!L->isLoopSimplifyForm())
    return false;
  BasicBlock *Exit = L->getUniqueExitBlock(); // successor out of loop
  if (!Exit)
    return false;

  // Use Scalar Evolution to compute the trip count. This allows more loops to
  // be unrolled than relying on induction var simplification.
  if (!SE)
    return false;

  // Only unroll loops with a computable trip count, and the trip count needs
  // to be an int value (allowing a pointer type is a TODO item).
  const SCEV *BECountSC = SE->getBackedgeTakenCount(L);
  if (isa<SCEVCouldNotCompute>(BECountSC) ||
      !BECountSC->getType()->isIntegerTy())
    return false;

  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();

  // Add 1 since the backedge count doesn't include the first loop iteration.
  const SCEV *TripCountSC =
      SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
  if (isa<SCEVCouldNotCompute>(TripCountSC))
    return false;

  BasicBlock *Header = L->getHeader();
  BasicBlock *PreHeader = L->getLoopPreheader();
  BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
  const DataLayout &DL = Header->getModule()->getDataLayout();
  SCEVExpander Expander(*SE, DL, "loop-unroll");
  if (!AllowExpensiveTripCount &&
      Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
    return false;

  // This constraint lets us deal with an overflowing trip count easily; see the
  // comment on ModVal below.
  if (Log2_32(Count) > BEWidth)
    return false;

  BasicBlock *Latch = L->getLoopLatch();

  // Loop structure is the following:
  //
  // PreHeader
  //   Header
  //   ...
  //   Latch
  // Exit

  BasicBlock *NewPreHeader;
  BasicBlock *NewExit = nullptr;
  BasicBlock *PrologExit = nullptr;
  BasicBlock *EpilogPreHeader = nullptr;
  BasicBlock *PrologPreHeader = nullptr;

  if (UseEpilogRemainder) {
    // If epilog remainder
    // Split PreHeader to insert a branch around loop for unrolling.
    NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
    NewPreHeader->setName(PreHeader->getName() + ".new");
    // Split Exit to create phi nodes from branch above.
    SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
    NewExit = SplitBlockPredecessors(Exit, Preds, ".unr-lcssa",
                                     DT, LI, PreserveLCSSA);
    // Split NewExit to insert epilog remainder loop.
    EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
    EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
  } else {
    // If prolog remainder
    // Split the original preheader twice to insert prolog remainder loop
    PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
    PrologPreHeader->setName(Header->getName() + ".prol.preheader");
    PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
                            DT, LI);
    PrologExit->setName(Header->getName() + ".prol.loopexit");
    // Split PrologExit to get NewPreHeader.
    NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
    NewPreHeader->setName(PreHeader->getName() + ".new");
  }
  // Loop structure should be the following:
  //  Epilog             Prolog
  //
  // PreHeader         PreHeader
  // *NewPreHeader     *PrologPreHeader
  //   Header          *PrologExit
  //   ...             *NewPreHeader
  //   Latch             Header
  // *NewExit            ...
  // *EpilogPreHeader    Latch
  // Exit              Exit

  // Calculate conditions for branch around loop for unrolling
  // in epilog case and around prolog remainder loop in prolog case.
  // Compute the number of extra iterations required, which is:
  //  extra iterations = run-time trip count % loop unroll factor
  PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                            PreHeaderBR);
  Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
                                          PreHeaderBR);
  IRBuilder<> B(PreHeaderBR);
  Value *ModVal;
  // Calculate ModVal = (BECount + 1) % Count.
  // Note that TripCount is BECount + 1.
  if (isPowerOf2_32(Count)) {
    // When Count is power of 2 we don't BECount for epilog case, however we'll
    // need it for a branch around unrolling loop for prolog case.
    ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
    //  1. There are no iterations to be run in the prolog/epilog loop.
    // OR
    //  2. The addition computing TripCount overflowed.
    //
    // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
    // the number of iterations that remain to be run in the original loop is a
    // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
    // explicitly check this above).
  } else {
    // As (BECount + 1) can potentially unsigned overflow we count
    // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
    Value *ModValTmp = B.CreateURem(BECount,
                                    ConstantInt::get(BECount->getType(),
                                                     Count));
    Value *ModValAdd = B.CreateAdd(ModValTmp,
                                   ConstantInt::get(ModValTmp->getType(), 1));
    // At that point (BECount % Count) + 1 could be equal to Count.
    // To handle this case we need to take mod by Count one more time.
    ModVal = B.CreateURem(ModValAdd,
                          ConstantInt::get(BECount->getType(), Count),
                          "xtraiter");
  }
  Value *BranchVal =
      UseEpilogRemainder ? B.CreateICmpULT(BECount,
                                           ConstantInt::get(BECount->getType(),
                                                            Count - 1)) :
                           B.CreateIsNotNull(ModVal, "lcmp.mod");
  BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
  BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
  // Branch to either remainder (extra iterations) loop or unrolling loop.
  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
  PreHeaderBR->eraseFromParent();
  Function *F = Header->getParent();
  // Get an ordered list of blocks in the loop to help with the ordering of the
  // cloned blocks in the prolog/epilog code
  LoopBlocksDFS LoopBlocks(L);
  LoopBlocks.perform(LI);

  //
  // For each extra loop iteration, create a copy of the loop's basic blocks
  // and generate a condition that branches to the copy depending on the
  // number of 'left over' iterations.
  //
  std::vector<BasicBlock *> NewBlocks;
  ValueToValueMapTy VMap;

  // For unroll factor 2 remainder loop will have 1 iterations.
  // Do not create 1 iteration loop.
  bool CreateRemainderLoop = (Count != 2);

  // Clone all the basic blocks in the loop. If Count is 2, we don't clone
  // the loop, otherwise we create a cloned loop to execute the extra
  // iterations. This function adds the appropriate CFG connections.
  BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit;
  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
  CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop,
                  InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI);

  // Insert the cloned blocks into the function.
  F->getBasicBlockList().splice(InsertBot->getIterator(),
                                F->getBasicBlockList(),
                                NewBlocks[0]->getIterator(),
                                F->end());

  // Loop structure should be the following:
  //  Epilog             Prolog
  //
  // PreHeader         PreHeader
  // NewPreHeader      PrologPreHeader
  //   Header            PrologHeader
  //   ...               ...
  //   Latch             PrologLatch
  // NewExit           PrologExit
  // EpilogPreHeader   NewPreHeader
  //   EpilogHeader      Header
  //   ...               ...
  //   EpilogLatch       Latch
  // Exit              Exit

  // Rewrite the cloned instruction operands to use the values created when the
  // clone is created.
  for (BasicBlock *BB : NewBlocks) {
    for (Instruction &I : *BB) {
      RemapInstruction(&I, VMap,
                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
    }
  }

  if (UseEpilogRemainder) {
    // Connect the epilog code to the original loop and update the
    // PHI functions.
    ConnectEpilog(L, ModVal, NewExit, Exit, PreHeader,
                  EpilogPreHeader, NewPreHeader, VMap, DT, LI,
                  PreserveLCSSA);

    // Update counter in loop for unrolling.
    // I should be multiply of Count.
    IRBuilder<> B2(NewPreHeader->getTerminator());
    Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
    BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
    B2.SetInsertPoint(LatchBR);
    PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
                                      Header->getFirstNonPHI());
    Value *IdxSub =
        B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
                     NewIdx->getName() + ".nsub");
    Value *IdxCmp;
    if (LatchBR->getSuccessor(0) == Header)
      IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
    else
      IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
    NewIdx->addIncoming(TestVal, NewPreHeader);
    NewIdx->addIncoming(IdxSub, Latch);
    LatchBR->setCondition(IdxCmp);
  } else {
    // Connect the prolog code to the original loop and update the
    // PHI functions.
    ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,
                  VMap, DT, LI, PreserveLCSSA);
  }

  // If this loop is nested, then the loop unroller changes the code in the
  // parent loop, so the Scalar Evolution pass needs to be run again.
  if (Loop *ParentLoop = L->getParentLoop())
    SE->forgetLoop(ParentLoop);

  NumRuntimeUnrolled++;
  return true;
}
Пример #21
0
void IrGen::visit(AstOperator& op) {
  const auto& astOperands = op.args().childs();
  Value* llvmResult = nullptr;

  // unary non-arithmetic operators
  if (op.op() == AstOperator::eNot) {
    llvmResult = m_builder.CreateNot(callAcceptOn(*astOperands.front()), "not");
  }
  else if (op.op() == AstOperator::eAddrOf) {
    astOperands.front()->accept(*this);
    llvmResult = astOperands.front()->ir().irAddrOfIrObject();
  }
  else if (op.op() == AstOperator::eDeref) {
    op.ir().setAddrOfIrObject(
      callAcceptOn(*astOperands.front()), EInitStatus::eInitialized);
  }

  // binary logical short circuit operators
  else if (op.isBinaryLogicalShortCircuit()) {
    const auto opname =
      op.op() == AstOperator::eAnd ? string{"and"} : string{"or"};
    Function* functionIr = m_builder.GetInsertBlock()->getParent();
    BasicBlock* rhsBB = BasicBlock::Create(llvmContext, opname + "_rhs");
    BasicBlock* mergeBB = BasicBlock::Create(llvmContext, opname + "_merge");

    // current/lhs BB:
    auto llvmLhs = callAcceptOn(*astOperands.front());
    assert(llvmLhs);
    if (op.op() == AstOperator::eAnd) {
      m_builder.CreateCondBr(llvmLhs, rhsBB, mergeBB);
    }
    else if (op.op() == AstOperator::eOr) {
      m_builder.CreateCondBr(llvmLhs, mergeBB, rhsBB);
    }
    else {
      assert(false);
    }
    BasicBlock* lhsLastBB = m_builder.GetInsertBlock();

    // rhsBB:
    functionIr->getBasicBlockList().push_back(rhsBB);
    m_builder.SetInsertPoint(rhsBB);
    auto llvmRhs = callAcceptOn(*astOperands.back());
    m_builder.CreateBr(mergeBB);
    BasicBlock* rhsLastBB = m_builder.GetInsertBlock();

    // mergeBB:
    functionIr->getBasicBlockList().push_back(mergeBB);
    m_builder.SetInsertPoint(mergeBB);
    PHINode* phi = m_builder.CreatePHI(Type::getInt1Ty(llvmContext), 2, opname);
    assert(phi);
    phi->addIncoming(llvmLhs, lhsLastBB);
    phi->addIncoming(llvmRhs, rhsLastBB);
    llvmResult = phi;
  }

  // assignment operators
  else if (op.op() == AstOperator::eAssign ||
    op.op() == AstOperator::eVoidAssign) {
    astOperands.front()->accept(*this);
    auto llvmRhs = callAcceptOn(*astOperands.back());
    m_builder.CreateStore(
      llvmRhs, astOperands.front()->ir().irAddrOfIrObject());
    if (op.op() == AstOperator::eVoidAssign) {
      llvmResult = m_abstractObject; // void
    }
    else {
      // op.object() is the same as astOperands.front().object(), so
      // 'returning the result' is now a nop.
    }
  }

  // binary arithmetic operators
  else if (astOperands.size() == 2) {
    auto llvmLhs = callAcceptOn(*astOperands.front());
    auto llvmRhs = callAcceptOn(*astOperands.back());
    if (astOperands.front()->objType().is(ObjType::eStoredAsIntegral)) {
      switch (op.op()) {
        // clang-format off
      case AstOperator::eSub      : llvmResult = m_builder.CreateSub   (llvmLhs, llvmRhs, "sub"); break;
      case AstOperator::eAdd      : llvmResult = m_builder.CreateAdd   (llvmLhs, llvmRhs, "add"); break;
      case AstOperator::eMul      : llvmResult = m_builder.CreateMul   (llvmLhs, llvmRhs, "mul"); break;
      case AstOperator::eDiv      : llvmResult = m_builder.CreateSDiv  (llvmLhs, llvmRhs, "div"); break;
      case AstOperator::eEqualTo  : llvmResult = m_builder.CreateICmpEQ(llvmLhs, llvmRhs, "cmp"); break;
      default: assert(false);
        // clang-format on
      }
    }
    else {
      switch (op.op()) {
        // clang-format off
      case AstOperator::eSub      : llvmResult = m_builder.CreateFSub   (llvmLhs, llvmRhs, "fsub"); break;
      case AstOperator::eAdd      : llvmResult = m_builder.CreateFAdd   (llvmLhs, llvmRhs, "add"); break;
      case AstOperator::eMul      : llvmResult = m_builder.CreateFMul   (llvmLhs, llvmRhs, "mul"); break;
      case AstOperator::eDiv      : llvmResult = m_builder.CreateFDiv   (llvmLhs, llvmRhs, "div"); break;
      case AstOperator::eEqualTo  : llvmResult = m_builder.CreateFCmpOEQ(llvmLhs, llvmRhs, "cmp"); break;
      default: assert(false);
        // clang-format on
      }
    }
    assert(llvmResult);
  }

  // unary arithmetic operators
  else {
    assert(astOperands.size() == 1);
    const auto& operand = astOperands.front();
    const auto& objType = operand->objType();
    auto llvmOperand = callAcceptOn(*operand);

    if (objType.is(ObjType::eStoredAsIntegral)) {
      auto llvmZero = ConstantInt::get(llvmContext, APInt(objType.size(), 0));
      switch (op.op()) {
      case '-':
        llvmResult = m_builder.CreateSub(llvmZero, llvmOperand, "neg");
        break;
      case '+': llvmResult = llvmOperand; break;
      default: assert(false);
      }
    }
    else {
      auto llvmZero = ConstantFP::get(llvmContext, APFloat(0.0));
      switch (op.op()) {
      case '-':
        llvmResult = m_builder.CreateFSub(llvmZero, llvmOperand, "fneg");
        break;
      case '+': llvmResult = llvmOperand; break;
      default: assert(false);
      }
    }
    assert(llvmResult);
  }

  if (llvmResult) { allocateAndInitLocalIrObjectFor(op, llvmResult); }
}
Пример #22
0
/// SplitBlockPredecessors - This method transforms BB by introducing a new
/// basic block into the function, and moving some of the predecessors of BB to
/// be predecessors of the new block.  The new predecessors are indicated by the
/// Preds array, which has NumPreds elements in it.  The new block is given a
/// suffix of 'Suffix'.
///
/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
/// In particular, it does not preserve LoopSimplify (because it's
/// complicated to handle the case where one of the edges being split
/// is an exit of a loop with other exits).
///
BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
                                         BasicBlock *const *Preds,
                                         unsigned NumPreds, const char *Suffix,
                                         Pass *P) {
  // Create new basic block, insert right before the original block.
  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
                                         BB->getParent(), BB);
  
  // The new block unconditionally branches to the old block.
  BranchInst *BI = BranchInst::Create(BB, NewBB);
  
  LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0;
  Loop *L = LI ? LI->getLoopFor(BB) : 0;
  bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);

  // Move the edges from Preds to point to NewBB instead of BB.
  // While here, if we need to preserve loop analyses, collect
  // some information about how this split will affect loops.
  bool HasLoopExit = false;
  bool IsLoopEntry = !!L;
  bool SplitMakesNewLoopHeader = false;
  for (unsigned i = 0; i != NumPreds; ++i) {
    // This is slightly more strict than necessary; the minimum requirement
    // is that there be no more than one indirectbr branching to BB. And
    // all BlockAddress uses would need to be updated.
    assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
           "Cannot split an edge from an IndirectBrInst");

    Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);

    if (LI) {
      // If we need to preserve LCSSA, determine if any of
      // the preds is a loop exit.
      if (PreserveLCSSA)
        if (Loop *PL = LI->getLoopFor(Preds[i]))
          if (!PL->contains(BB))
            HasLoopExit = true;
      // If we need to preserve LoopInfo, note whether any of the
      // preds crosses an interesting loop boundary.
      if (L) {
        if (L->contains(Preds[i]))
          IsLoopEntry = false;
        else
          SplitMakesNewLoopHeader = true;
      }
    }
  }

  // Update dominator tree and dominator frontier if available.
  DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
  if (DT)
    DT->splitBlock(NewBB);
  if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0)
    DF->splitBlock(NewBB);

  // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
  // node becomes an incoming value for BB's phi node.  However, if the Preds
  // list is empty, we need to insert dummy entries into the PHI nodes in BB to
  // account for the newly created predecessor.
  if (NumPreds == 0) {
    // Insert dummy values as the incoming value.
    for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
      cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
    return NewBB;
  }

  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;

  if (L) {
    if (IsLoopEntry) {
      // Add the new block to the nearest enclosing loop (and not an
      // adjacent loop). To find this, examine each of the predecessors and
      // determine which loops enclose them, and select the most-nested loop
      // which contains the loop containing the block being split.
      Loop *InnermostPredLoop = 0;
      for (unsigned i = 0; i != NumPreds; ++i)
        if (Loop *PredLoop = LI->getLoopFor(Preds[i])) {
          // Seek a loop which actually contains the block being split (to
          // avoid adjacent loops).
          while (PredLoop && !PredLoop->contains(BB))
            PredLoop = PredLoop->getParentLoop();
          // Select the most-nested of these loops which contains the block.
          if (PredLoop &&
              PredLoop->contains(BB) &&
              (!InnermostPredLoop ||
               InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
            InnermostPredLoop = PredLoop;
        }
      if (InnermostPredLoop)
        InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
    } else {
      L->addBasicBlockToLoop(NewBB, LI->getBase());
      if (SplitMakesNewLoopHeader)
        L->moveToHeader(NewBB);
    }
  }
  
  // Otherwise, create a new PHI node in NewBB for each PHI node in BB.
  for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
    PHINode *PN = cast<PHINode>(I++);
    
    // Check to see if all of the values coming in are the same.  If so, we
    // don't need to create a new PHI node, unless it's needed for LCSSA.
    Value *InVal = 0;
    if (!HasLoopExit) {
      InVal = PN->getIncomingValueForBlock(Preds[0]);
      for (unsigned i = 1; i != NumPreds; ++i)
        if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
          InVal = 0;
          break;
        }
    }

    if (InVal) {
      // If all incoming values for the new PHI would be the same, just don't
      // make a new PHI.  Instead, just remove the incoming values from the old
      // PHI.
      for (unsigned i = 0; i != NumPreds; ++i)
        PN->removeIncomingValue(Preds[i], false);
    } else {
      // If the values coming into the block are not the same, we need a PHI.
      // Create the new PHI node, insert it into NewBB at the end of the block
      PHINode *NewPHI =
        PHINode::Create(PN->getType(), PN->getName()+".ph", BI);
      if (AA) AA->copyValue(PN, NewPHI);
      
      // Move all of the PHI values for 'Preds' to the new PHI.
      for (unsigned i = 0; i != NumPreds; ++i) {
        Value *V = PN->removeIncomingValue(Preds[i], false);
        NewPHI->addIncoming(V, Preds[i]);
      }
      InVal = NewPHI;
    }
    
    // Add an incoming value to the PHI node in the loop for the preheader
    // edge.
    PN->addIncoming(InVal, NewBB);
  }
  
  return NewBB;
}
Пример #23
0
/// Renames all variables in the specified BasicBlock.
/// Only variables that need to be rename will be.
///
void SSI::rename(BasicBlock *BB) {
  SmallPtrSet<Instruction*, 8> defined;

  // Iterate through instructions and make appropriate renaming.
  // For SSI_PHI (b = PHI()), store b at value_stack as a new
  // definition of the variable it represents.
  // For SSI_SIG (b = PHI(a)), substitute a with the current
  // value of a, present in the value_stack.
  // Then store bin the value_stack as the new definition of a.
  // For all other instructions (b = OP(a, c, d, ...)), we need to substitute
  // all operands with its current value, present in value_stack.
  for (BasicBlock::iterator begin = BB->begin(), end = BB->end();
       begin != end; ++begin) {
    Instruction *I = begin;
    if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions
      Instruction* position;

      // Treat SSI_PHI
      if ((position = getPositionPhi(PN))) {
        value_stack[position].push_back(PN);
        defined.insert(position);
      // Treat SSI_SIG
      } else if ((position = getPositionSigma(PN))) {
        substituteUse(I);
        value_stack[position].push_back(PN);
        defined.insert(position);
      }

      // Treat all other PHI functions
      else {
        substituteUse(I);
      }
    }

    // Treat all other functions
    else {
      substituteUse(I);
    }
  }

  // This loop iterates in all BasicBlocks that are successors of the current
  // BasicBlock. For each SSI_PHI instruction found, insert an operand.
  // This operand is the current operand in value_stack for the variable
  // in "position". And the BasicBlock this operand represents is the current
  // BasicBlock.
  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) {
    BasicBlock *BB_succ = *SI;

    for (BasicBlock::iterator begin = BB_succ->begin(),
         notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) {
      Instruction *I = begin;
      PHINode *PN = dyn_cast<PHINode>(I);
      Instruction* position;
      if (PN && ((position = getPositionPhi(PN)))) {
        PN->addIncoming(value_stack[position].back(), BB);
      }
    }
  }

  // This loop calls rename on all children from this block. This time children
  // refers to a successor block in the dominance tree.
  DomTreeNode *DTN = DT_->getNode(BB);
  for (DomTreeNode::iterator begin = DTN->begin(), end = DTN->end();
       begin != end; ++begin) {
    DomTreeNodeBase<BasicBlock> *DTN_children = *begin;
    BasicBlock *BB_children = DTN_children->getBlock();
    rename(BB_children);
  }

  // Now we remove all inserted definitions of a variable from the top of
  // the stack leaving the previous one as the top.
  for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(),
       DE = defined.end(); DI != DE; ++DI)
    value_stack[*DI].pop_back();
}
Пример #24
0
/// Connect the unrolling epilog code to the original loop.
/// The unrolling epilog code contains code to execute the
/// 'extra' iterations if the run-time trip count modulo the
/// unroll count is non-zero.
///
/// This function performs the following:
/// - Update PHI nodes at the unrolling loop exit and epilog loop exit
/// - Create PHI nodes at the unrolling loop exit to combine
///   values that exit the unrolling loop code and jump around it.
/// - Update PHI operands in the epilog loop by the new PHI nodes
/// - Branch around the epilog loop if extra iters (ModVal) is zero.
///
static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
                          BasicBlock *Exit, BasicBlock *PreHeader,
                          BasicBlock *EpilogPreHeader, BasicBlock *NewPreHeader,
                          ValueToValueMapTy &VMap, DominatorTree *DT,
                          LoopInfo *LI, bool PreserveLCSSA)  {
  BasicBlock *Latch = L->getLoopLatch();
  assert(Latch && "Loop must have a latch");
  BasicBlock *EpilogLatch = cast<BasicBlock>(VMap[Latch]);

  // Loop structure should be the following:
  //
  // PreHeader
  // NewPreHeader
  //   Header
  //   ...
  //   Latch
  // NewExit (PN)
  // EpilogPreHeader
  //   EpilogHeader
  //   ...
  //   EpilogLatch
  // Exit (EpilogPN)

  // Update PHI nodes at NewExit and Exit.
  for (Instruction &BBI : *NewExit) {
    PHINode *PN = dyn_cast<PHINode>(&BBI);
    // Exit when we passed all PHI nodes.
    if (!PN)
      break;
    // PN should be used in another PHI located in Exit block as
    // Exit was split by SplitBlockPredecessors into Exit and NewExit
    // Basicaly it should look like:
    // NewExit:
    //   PN = PHI [I, Latch]
    // ...
    // Exit:
    //   EpilogPN = PHI [PN, EpilogPreHeader]
    //
    // There is EpilogPreHeader incoming block instead of NewExit as
    // NewExit was spilt 1 more time to get EpilogPreHeader.
    assert(PN->hasOneUse() && "The phi should have 1 use");
    PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser());
    assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");

    // Add incoming PreHeader from branch around the Loop
    PN->addIncoming(UndefValue::get(PN->getType()), PreHeader);

    Value *V = PN->getIncomingValueForBlock(Latch);
    Instruction *I = dyn_cast<Instruction>(V);
    if (I && L->contains(I))
      // If value comes from an instruction in the loop add VMap value.
      V = VMap.lookup(I);
    // For the instruction out of the loop, constant or undefined value
    // insert value itself.
    EpilogPN->addIncoming(V, EpilogLatch);

    assert(EpilogPN->getBasicBlockIndex(EpilogPreHeader) >= 0 &&
          "EpilogPN should have EpilogPreHeader incoming block");
    // Change EpilogPreHeader incoming block to NewExit.
    EpilogPN->setIncomingBlock(EpilogPN->getBasicBlockIndex(EpilogPreHeader),
                               NewExit);
    // Now PHIs should look like:
    // NewExit:
    //   PN = PHI [I, Latch], [undef, PreHeader]
    // ...
    // Exit:
    //   EpilogPN = PHI [PN, NewExit], [VMap[I], EpilogLatch]
  }

  // Create PHI nodes at NewExit (from the unrolling loop Latch and PreHeader).
  // Update corresponding PHI nodes in epilog loop.
  for (BasicBlock *Succ : successors(Latch)) {
    // Skip this as we already updated phis in exit blocks.
    if (!L->contains(Succ))
      continue;
    for (Instruction &BBI : *Succ) {
      PHINode *PN = dyn_cast<PHINode>(&BBI);
      // Exit when we passed all PHI nodes.
      if (!PN)
        break;
      // Add new PHI nodes to the loop exit block and update epilog
      // PHIs with the new PHI values.
      PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr",
                                       NewExit->getFirstNonPHI());
      // Adding a value to the new PHI node from the unrolling loop preheader.
      NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader);
      // Adding a value to the new PHI node from the unrolling loop latch.
      NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch);

      // Update the existing PHI node operand with the value from the new PHI
      // node.  Corresponding instruction in epilog loop should be PHI.
      PHINode *VPN = cast<PHINode>(VMap[&BBI]);
      VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);
    }
  }

  Instruction *InsertPt = NewExit->getTerminator();
  IRBuilder<> B(InsertPt);
  Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
  assert(Exit && "Loop must have a single exit block only");
  // Split the epilogue exit to maintain loop canonicalization guarantees
  SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
  SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI,
                         PreserveLCSSA);
  // Add the branch to the exit block (around the unrolling loop)
  B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
  InsertPt->eraseFromParent();
  if (DT)
    DT->changeImmediateDominator(Exit, NewExit);

  // Split the main loop exit to maintain canonicalization guarantees.
  SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
  SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI,
                         PreserveLCSSA);
}
Пример #25
0
/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
/// into calls to the appropriate _Unwind_Resume function.
bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
    bool UsesNewEH = false;
    SmallVector<ResumeInst*, 16> Resumes;
    for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
        TerminatorInst *TI = I->getTerminator();
        if (ResumeInst *RI = dyn_cast<ResumeInst>(TI))
            Resumes.push_back(RI);
        else if (InvokeInst *II = dyn_cast<InvokeInst>(TI))
            UsesNewEH = II->getUnwindDest()->isLandingPad();
    }

    if (Resumes.empty())
        return UsesNewEH;

    // Find the rewind function if we didn't already.
    if (!RewindFunction) {
        LLVMContext &Ctx = Resumes[0]->getContext();
        FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
                                              Type::getInt8PtrTy(Ctx), false);
        const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
        RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy);
    }

    // Create the basic block where the _Unwind_Resume call will live.
    LLVMContext &Ctx = Fn.getContext();
    unsigned ResumesSize = Resumes.size();

    if (ResumesSize == 1) {
        // Instead of creating a new BB and PHI node, just append the call to
        // _Unwind_Resume to the end of the single resume block.
        ResumeInst *RI = Resumes.front();
        BasicBlock *UnwindBB = RI->getParent();
        Value *ExnObj = GetExceptionObject(RI);

        // Call the _Unwind_Resume function.
        CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
        CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));

        // We never expect _Unwind_Resume to return.
        new UnreachableInst(Ctx, UnwindBB);
        return true;
    }

    BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
    PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize,
                                  "exn.obj", UnwindBB);

    // Extract the exception object from the ResumeInst and add it to the PHI node
    // that feeds the _Unwind_Resume call.
    for (SmallVectorImpl<ResumeInst*>::iterator
            I = Resumes.begin(), E = Resumes.end(); I != E; ++I) {
        ResumeInst *RI = *I;
        BasicBlock *Parent = RI->getParent();
        BranchInst::Create(UnwindBB, Parent);

        Value *ExnObj = GetExceptionObject(RI);
        PN->addIncoming(ExnObj, Parent);

        ++NumResumesLowered;
    }

    // Call the function.
    CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB);
    CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));

    // We never expect _Unwind_Resume to return.
    new UnreachableInst(Ctx, UnwindBB);
    return true;
}
Пример #26
0
/// Create a clone of the blocks in a loop and connect them together.
/// If CreateRemainderLoop is false, loop structure will not be cloned,
/// otherwise a new loop will be created including all cloned blocks, and the
/// iterator of it switches to count NewIter down to 0.
/// The cloned blocks should be inserted between InsertTop and InsertBot.
/// If loop structure is cloned InsertTop should be new preheader, InsertBot
/// new loop exit.
/// Return the new cloned loop that is created when CreateRemainderLoop is true.
static Loop *
CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
                const bool UseEpilogRemainder, const bool UnrollRemainder,
                BasicBlock *InsertTop,
                BasicBlock *InsertBot, BasicBlock *Preheader,
                std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
                ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) {
  StringRef suffix = UseEpilogRemainder ? "epil" : "prol";
  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  Function *F = Header->getParent();
  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
  Loop *ParentLoop = L->getParentLoop();
  NewLoopsMap NewLoops;
  NewLoops[ParentLoop] = ParentLoop;
  if (!CreateRemainderLoop)
    NewLoops[L] = ParentLoop;

  // For each block in the original loop, create a new copy,
  // and update the value map with the newly created values.
  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
    NewBlocks.push_back(NewBB);

    // If we're unrolling the outermost loop, there's no remainder loop,
    // and this block isn't in a nested loop, then the new block is not
    // in any loop. Otherwise, add it to loopinfo.
    if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
      addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);

    VMap[*BB] = NewBB;
    if (Header == *BB) {
      // For the first block, add a CFG connection to this newly
      // created block.
      InsertTop->getTerminator()->setSuccessor(0, NewBB);
    }

    if (DT) {
      if (Header == *BB) {
        // The header is dominated by the preheader.
        DT->addNewBlock(NewBB, InsertTop);
      } else {
        // Copy information from original loop to unrolled loop.
        BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock();
        DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDomBB]));
      }
    }

    if (Latch == *BB) {
      // For the last block, if CreateRemainderLoop is false, create a direct
      // jump to InsertBot. If not, create a loop back to cloned head.
      VMap.erase((*BB)->getTerminator());
      BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
      BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
      IRBuilder<> Builder(LatchBR);
      if (!CreateRemainderLoop) {
        Builder.CreateBr(InsertBot);
      } else {
        PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
                                          suffix + ".iter",
                                          FirstLoopBB->getFirstNonPHI());
        Value *IdxSub =
            Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
                              NewIdx->getName() + ".sub");
        Value *IdxCmp =
            Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
        Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
        NewIdx->addIncoming(NewIter, InsertTop);
        NewIdx->addIncoming(IdxSub, NewBB);
      }
      LatchBR->eraseFromParent();
    }
  }

  // Change the incoming values to the ones defined in the preheader or
  // cloned loop.
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
    if (!CreateRemainderLoop) {
      if (UseEpilogRemainder) {
        unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
        NewPHI->setIncomingBlock(idx, InsertTop);
        NewPHI->removeIncomingValue(Latch, false);
      } else {
        VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
        cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
      }
    } else {
      unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
      NewPHI->setIncomingBlock(idx, InsertTop);
      BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
      idx = NewPHI->getBasicBlockIndex(Latch);
      Value *InVal = NewPHI->getIncomingValue(idx);
      NewPHI->setIncomingBlock(idx, NewLatch);
      if (Value *V = VMap.lookup(InVal))
        NewPHI->setIncomingValue(idx, V);
    }
  }
  if (CreateRemainderLoop) {
    Loop *NewLoop = NewLoops[L];
    assert(NewLoop && "L should have been cloned");

    // Only add loop metadata if the loop is not going to be completely
    // unrolled.
    if (UnrollRemainder)
      return NewLoop;

    // Add unroll disable metadata to disable future unrolling for this loop.
    SmallVector<Metadata *, 4> MDs;
    // Reserve first location for self reference to the LoopID metadata node.
    MDs.push_back(nullptr);
    MDNode *LoopID = NewLoop->getLoopID();
    if (LoopID) {
      // First remove any existing loop unrolling metadata.
      for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
        bool IsUnrollMetadata = false;
        MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
        if (MD) {
          const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
          IsUnrollMetadata = S && S->getString().startswith("llvm.loop.unroll.");
        }
        if (!IsUnrollMetadata)
          MDs.push_back(LoopID->getOperand(i));
      }
    }

    LLVMContext &Context = NewLoop->getHeader()->getContext();
    SmallVector<Metadata *, 1> DisableOperands;
    DisableOperands.push_back(MDString::get(Context,
                                            "llvm.loop.unroll.disable"));
    MDNode *DisableNode = MDNode::get(Context, DisableOperands);
    MDs.push_back(DisableNode);

    MDNode *NewLoopID = MDNode::get(Context, MDs);
    // Set operand 0 to refer to the loop id itself.
    NewLoopID->replaceOperandWith(0, NewLoopID);
    NewLoop->setLoopID(NewLoopID);
    return NewLoop;
  }
  else
    return nullptr;
}
Пример #27
0
/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi
/// node) return blocks, merge them together to promote recursive block merging.
static bool mergeEmptyReturnBlocks(Function &F) {
  bool Changed = false;

  BasicBlock *RetBlock = 0;

  // Scan all the blocks in the function, looking for empty return blocks.
  for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
    BasicBlock &BB = *BBI++;

    // Only look at return blocks.
    ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
    if (Ret == 0) continue;

    // Only look at the block if it is empty or the only other thing in it is a
    // single PHI node that is the operand to the return.
    if (Ret != &BB.front()) {
      // Check for something else in the block.
      BasicBlock::iterator I = Ret;
      --I;
      // Skip over debug info.
      while (isa<DbgInfoIntrinsic>(I) && I != BB.begin())
        --I;
      if (!isa<DbgInfoIntrinsic>(I) &&
          (!isa<PHINode>(I) || I != BB.begin() ||
           Ret->getNumOperands() == 0 ||
           Ret->getOperand(0) != I))
        continue;
    }

    // If this is the first returning block, remember it and keep going.
    if (RetBlock == 0) {
      RetBlock = &BB;
      continue;
    }

    // Otherwise, we found a duplicate return block.  Merge the two.
    Changed = true;

    // Case when there is no input to the return or when the returned values
    // agree is trivial.  Note that they can't agree if there are phis in the
    // blocks.
    if (Ret->getNumOperands() == 0 ||
        Ret->getOperand(0) ==
          cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
      BB.replaceAllUsesWith(RetBlock);
      BB.eraseFromParent();
      continue;
    }

    // If the canonical return block has no PHI node, create one now.
    PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
    if (RetBlockPHI == 0) {
      Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
      pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
      RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
                                    std::distance(PB, PE), "merge",
                                    &RetBlock->front());

      for (pred_iterator PI = PB; PI != PE; ++PI)
        RetBlockPHI->addIncoming(InVal, *PI);
      RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
    }

    // Turn BB into a block that just unconditionally branches to the return
    // block.  This handles the case when the two return blocks have a common
    // predecessor but that return different things.
    RetBlockPHI->addIncoming(Ret->getOperand(0), &BB);
    BB.getTerminator()->eraseFromParent();
    BranchInst::Create(RetBlock, &BB);
  }

  return Changed;
}
Пример #28
0
bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                      bool AllowExpensiveTripCount,
                                      bool UseEpilogRemainder,
                                      bool UnrollRemainder,
                                      LoopInfo *LI, ScalarEvolution *SE,
                                      DominatorTree *DT, AssumptionCache *AC,
                                      OptimizationRemarkEmitter *ORE,
                                      bool PreserveLCSSA) {
  DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n");
  DEBUG(L->dump());
  DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" :
        dbgs() << "Using prolog remainder.\n");

  // Make sure the loop is in canonical form.
  if (!L->isLoopSimplifyForm()) {
    DEBUG(dbgs() << "Not in simplify form!\n");
    return false;
  }

  // Guaranteed by LoopSimplifyForm.
  BasicBlock *Latch = L->getLoopLatch();
  BasicBlock *Header = L->getHeader();

  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
  unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0;
  BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex);
  // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the
  // targets of the Latch be an exit block out of the loop. This needs
  // to be guaranteed by the callers of UnrollRuntimeLoopRemainder.
  assert(!L->contains(LatchExit) &&
         "one of the loop latch successors should be the exit block!");
  // These are exit blocks other than the target of the latch exiting block.
  SmallVector<BasicBlock *, 4> OtherExits;
  bool isMultiExitUnrollingEnabled =
      canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
                                   UseEpilogRemainder) &&
      canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
                                       UseEpilogRemainder);
  // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
  if (!isMultiExitUnrollingEnabled &&
      (!L->getExitingBlock() || OtherExits.size())) {
    DEBUG(
        dbgs()
        << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
           "enabled!\n");
    return false;
  }
  // Use Scalar Evolution to compute the trip count. This allows more loops to
  // be unrolled than relying on induction var simplification.
  if (!SE)
    return false;

  // Only unroll loops with a computable trip count, and the trip count needs
  // to be an int value (allowing a pointer type is a TODO item).
  // We calculate the backedge count by using getExitCount on the Latch block,
  // which is proven to be the only exiting block in this loop. This is same as
  // calculating getBackedgeTakenCount on the loop (which computes SCEV for all
  // exiting blocks).
  const SCEV *BECountSC = SE->getExitCount(L, Latch);
  if (isa<SCEVCouldNotCompute>(BECountSC) ||
      !BECountSC->getType()->isIntegerTy()) {
    DEBUG(dbgs() << "Could not compute exit block SCEV\n");
    return false;
  }

  unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();

  // Add 1 since the backedge count doesn't include the first loop iteration.
  const SCEV *TripCountSC =
      SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
  if (isa<SCEVCouldNotCompute>(TripCountSC)) {
    DEBUG(dbgs() << "Could not compute trip count SCEV.\n");
    return false;
  }

  BasicBlock *PreHeader = L->getLoopPreheader();
  BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
  const DataLayout &DL = Header->getModule()->getDataLayout();
  SCEVExpander Expander(*SE, DL, "loop-unroll");
  if (!AllowExpensiveTripCount &&
      Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) {
    DEBUG(dbgs() << "High cost for expanding trip count scev!\n");
    return false;
  }

  // This constraint lets us deal with an overflowing trip count easily; see the
  // comment on ModVal below.
  if (Log2_32(Count) > BEWidth) {
    DEBUG(dbgs()
          << "Count failed constraint on overflow trip count calculation.\n");
    return false;
  }

  // Loop structure is the following:
  //
  // PreHeader
  //   Header
  //   ...
  //   Latch
  // LatchExit

  BasicBlock *NewPreHeader;
  BasicBlock *NewExit = nullptr;
  BasicBlock *PrologExit = nullptr;
  BasicBlock *EpilogPreHeader = nullptr;
  BasicBlock *PrologPreHeader = nullptr;

  if (UseEpilogRemainder) {
    // If epilog remainder
    // Split PreHeader to insert a branch around loop for unrolling.
    NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
    NewPreHeader->setName(PreHeader->getName() + ".new");
    // Split LatchExit to create phi nodes from branch above.
    SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
    NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa",
                                     DT, LI, PreserveLCSSA);
    // Split NewExit to insert epilog remainder loop.
    EpilogPreHeader = SplitBlock(NewExit, NewExit->getTerminator(), DT, LI);
    EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
  } else {
    // If prolog remainder
    // Split the original preheader twice to insert prolog remainder loop
    PrologPreHeader = SplitEdge(PreHeader, Header, DT, LI);
    PrologPreHeader->setName(Header->getName() + ".prol.preheader");
    PrologExit = SplitBlock(PrologPreHeader, PrologPreHeader->getTerminator(),
                            DT, LI);
    PrologExit->setName(Header->getName() + ".prol.loopexit");
    // Split PrologExit to get NewPreHeader.
    NewPreHeader = SplitBlock(PrologExit, PrologExit->getTerminator(), DT, LI);
    NewPreHeader->setName(PreHeader->getName() + ".new");
  }
  // Loop structure should be the following:
  //  Epilog             Prolog
  //
  // PreHeader         PreHeader
  // *NewPreHeader     *PrologPreHeader
  //   Header          *PrologExit
  //   ...             *NewPreHeader
  //   Latch             Header
  // *NewExit            ...
  // *EpilogPreHeader    Latch
  // LatchExit              LatchExit

  // Calculate conditions for branch around loop for unrolling
  // in epilog case and around prolog remainder loop in prolog case.
  // Compute the number of extra iterations required, which is:
  //  extra iterations = run-time trip count % loop unroll factor
  PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                            PreHeaderBR);
  Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
                                          PreHeaderBR);
  IRBuilder<> B(PreHeaderBR);
  Value *ModVal;
  // Calculate ModVal = (BECount + 1) % Count.
  // Note that TripCount is BECount + 1.
  if (isPowerOf2_32(Count)) {
    // When Count is power of 2 we don't BECount for epilog case, however we'll
    // need it for a branch around unrolling loop for prolog case.
    ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
    //  1. There are no iterations to be run in the prolog/epilog loop.
    // OR
    //  2. The addition computing TripCount overflowed.
    //
    // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
    // the number of iterations that remain to be run in the original loop is a
    // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
    // explicitly check this above).
  } else {
    // As (BECount + 1) can potentially unsigned overflow we count
    // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
    Value *ModValTmp = B.CreateURem(BECount,
                                    ConstantInt::get(BECount->getType(),
                                                     Count));
    Value *ModValAdd = B.CreateAdd(ModValTmp,
                                   ConstantInt::get(ModValTmp->getType(), 1));
    // At that point (BECount % Count) + 1 could be equal to Count.
    // To handle this case we need to take mod by Count one more time.
    ModVal = B.CreateURem(ModValAdd,
                          ConstantInt::get(BECount->getType(), Count),
                          "xtraiter");
  }
  Value *BranchVal =
      UseEpilogRemainder ? B.CreateICmpULT(BECount,
                                           ConstantInt::get(BECount->getType(),
                                                            Count - 1)) :
                           B.CreateIsNotNull(ModVal, "lcmp.mod");
  BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
  BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
  // Branch to either remainder (extra iterations) loop or unrolling loop.
  B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
  PreHeaderBR->eraseFromParent();
  if (DT) {
    if (UseEpilogRemainder)
      DT->changeImmediateDominator(NewExit, PreHeader);
    else
      DT->changeImmediateDominator(PrologExit, PreHeader);
  }
  Function *F = Header->getParent();
  // Get an ordered list of blocks in the loop to help with the ordering of the
  // cloned blocks in the prolog/epilog code
  LoopBlocksDFS LoopBlocks(L);
  LoopBlocks.perform(LI);

  //
  // For each extra loop iteration, create a copy of the loop's basic blocks
  // and generate a condition that branches to the copy depending on the
  // number of 'left over' iterations.
  //
  std::vector<BasicBlock *> NewBlocks;
  ValueToValueMapTy VMap;

  // For unroll factor 2 remainder loop will have 1 iterations.
  // Do not create 1 iteration loop.
  bool CreateRemainderLoop = (Count != 2);

  // Clone all the basic blocks in the loop. If Count is 2, we don't clone
  // the loop, otherwise we create a cloned loop to execute the extra
  // iterations. This function adds the appropriate CFG connections.
  BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
  BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
  Loop *remainderLoop = CloneLoopBlocks(
      L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
      InsertTop, InsertBot,
      NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);

  // Insert the cloned blocks into the function.
  F->getBasicBlockList().splice(InsertBot->getIterator(),
                                F->getBasicBlockList(),
                                NewBlocks[0]->getIterator(),
                                F->end());

  // Now the loop blocks are cloned and the other exiting blocks from the
  // remainder are connected to the original Loop's exit blocks. The remaining
  // work is to update the phi nodes in the original loop, and take in the
  // values from the cloned region. Also update the dominator info for
  // OtherExits and their immediate successors, since we have new edges into
  // OtherExits.
  SmallSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks;
  for (auto *BB : OtherExits) {
   for (auto &II : *BB) {

     // Given we preserve LCSSA form, we know that the values used outside the
     // loop will be used through these phi nodes at the exit blocks that are
     // transformed below.
     if (!isa<PHINode>(II))
       break;
     PHINode *Phi = cast<PHINode>(&II);
     unsigned oldNumOperands = Phi->getNumIncomingValues();
     // Add the incoming values from the remainder code to the end of the phi
     // node.
     for (unsigned i =0; i < oldNumOperands; i++){
       Value *newVal = VMap[Phi->getIncomingValue(i)];
       // newVal can be a constant or derived from values outside the loop, and
       // hence need not have a VMap value.
       if (!newVal)
         newVal = Phi->getIncomingValue(i);
       Phi->addIncoming(newVal,
                           cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
     }
   }
#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
    for (BasicBlock *SuccBB : successors(BB)) {
      assert(!(any_of(OtherExits,
                      [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
               SuccBB == LatchExit) &&
             "Breaks the definition of dedicated exits!");
    }
#endif
   // Update the dominator info because the immediate dominator is no longer the
   // header of the original Loop. BB has edges both from L and remainder code.
   // Since the preheader determines which loop is run (L or directly jump to
   // the remainder code), we set the immediate dominator as the preheader.
   if (DT) {
     DT->changeImmediateDominator(BB, PreHeader);
     // Also update the IDom for immediate successors of BB.  If the current
     // IDom is the header, update the IDom to be the preheader because that is
     // the nearest common dominator of all predecessors of SuccBB.  We need to
     // check for IDom being the header because successors of exit blocks can
     // have edges from outside the loop, and we should not incorrectly update
     // the IDom in that case.
     for (BasicBlock *SuccBB: successors(BB))
       if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) {
         if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) {
           assert(!SuccBB->getSinglePredecessor() &&
                  "BB should be the IDom then!");
           DT->changeImmediateDominator(SuccBB, PreHeader);
         }
       }
    }
  }

  // Loop structure should be the following:
  //  Epilog             Prolog
  //
  // PreHeader         PreHeader
  // NewPreHeader      PrologPreHeader
  //   Header            PrologHeader
  //   ...               ...
  //   Latch             PrologLatch
  // NewExit           PrologExit
  // EpilogPreHeader   NewPreHeader
  //   EpilogHeader      Header
  //   ...               ...
  //   EpilogLatch       Latch
  // LatchExit              LatchExit

  // Rewrite the cloned instruction operands to use the values created when the
  // clone is created.
  for (BasicBlock *BB : NewBlocks) {
    for (Instruction &I : *BB) {
      RemapInstruction(&I, VMap,
                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
    }
  }

  if (UseEpilogRemainder) {
    // Connect the epilog code to the original loop and update the
    // PHI functions.
    ConnectEpilog(L, ModVal, NewExit, LatchExit, PreHeader,
                  EpilogPreHeader, NewPreHeader, VMap, DT, LI,
                  PreserveLCSSA);

    // Update counter in loop for unrolling.
    // I should be multiply of Count.
    IRBuilder<> B2(NewPreHeader->getTerminator());
    Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
    BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
    B2.SetInsertPoint(LatchBR);
    PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
                                      Header->getFirstNonPHI());
    Value *IdxSub =
        B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
                     NewIdx->getName() + ".nsub");
    Value *IdxCmp;
    if (LatchBR->getSuccessor(0) == Header)
      IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
    else
      IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
    NewIdx->addIncoming(TestVal, NewPreHeader);
    NewIdx->addIncoming(IdxSub, Latch);
    LatchBR->setCondition(IdxCmp);
  } else {
    // Connect the prolog code to the original loop and update the
    // PHI functions.
    ConnectProlog(L, BECount, Count, PrologExit, LatchExit, PreHeader,
                  NewPreHeader, VMap, DT, LI, PreserveLCSSA);
  }

  // If this loop is nested, then the loop unroller changes the code in the
  // parent loop, so the Scalar Evolution pass needs to be run again.
  if (Loop *ParentLoop = L->getParentLoop())
    SE->forgetLoop(ParentLoop);

  // Canonicalize to LoopSimplifyForm both original and remainder loops. We
  // cannot rely on the LoopUnrollPass to do this because it only does
  // canonicalization for parent/subloops and not the sibling loops.
  if (OtherExits.size() > 0) {
    // Generate dedicated exit blocks for the original loop, to preserve
    // LoopSimplifyForm.
    formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA);
    // Generate dedicated exit blocks for the remainder loop if one exists, to
    // preserve LoopSimplifyForm.
    if (remainderLoop)
      formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA);
  }

  if (remainderLoop && UnrollRemainder) {
    DEBUG(dbgs() << "Unrolling remainder loop\n");
    UnrollLoop(remainderLoop, /*Count*/Count - 1, /*TripCount*/Count - 1,
               /*Force*/false, /*AllowRuntime*/false,
               /*AllowExpensiveTripCount*/false, /*PreserveCondBr*/true,
               /*PreserveOnlyFirst*/false, /*TripMultiple*/1,
               /*PeelCount*/0, /*UnrollRemainder*/false, LI, SE, DT, AC, ORE,
               PreserveLCSSA);
  }

  NumRuntimeUnrolled++;
  return true;
}
Пример #29
0
void PromoteMem2Reg::run() {
  Function &F = *DT.getRoot()->getParent();

  if (AST)
    PointerAllocaValues.resize(Allocas.size());
  AllocaDbgDeclares.resize(Allocas.size());

  AllocaInfo Info;
  LargeBlockInfo LBI;

  for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
    AllocaInst *AI = Allocas[AllocaNum];

    assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
    assert(AI->getParent()->getParent() == &F &&
           "All allocas should be in the same function, which is same as DF!");

    removeLifetimeIntrinsicUsers(AI);

    if (AI->use_empty()) {
      // If there are no uses of the alloca, just delete it now.
      if (AST)
        AST->deleteValue(AI);
      AI->eraseFromParent();

      // Remove the alloca from the Allocas list, since it has been processed
      RemoveFromAllocasList(AllocaNum);
      ++NumDeadAlloca;
      continue;
    }

    // Calculate the set of read and write-locations for each alloca.  This is
    // analogous to finding the 'uses' and 'definitions' of each variable.
    Info.AnalyzeAlloca(AI);

    // If there is only a single store to this value, replace any loads of
    // it that are directly dominated by the definition with the value stored.
    if (Info.DefiningBlocks.size() == 1) {
      if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) {
        // The alloca has been processed, move on.
        RemoveFromAllocasList(AllocaNum);
        ++NumSingleStore;
        continue;
      }
    }

    // If the alloca is only read and written in one basic block, just perform a
    // linear sweep over the block to eliminate it.
    if (Info.OnlyUsedInOneBlock) {
      promoteSingleBlockAlloca(AI, Info, LBI, AST);

      // The alloca has been processed, move on.
      RemoveFromAllocasList(AllocaNum);
      continue;
    }

    // If we haven't computed dominator tree levels, do so now.
    if (DomLevels.empty()) {
      SmallVector<DomTreeNode *, 32> Worklist;

      DomTreeNode *Root = DT.getRootNode();
      DomLevels[Root] = 0;
      Worklist.push_back(Root);

      while (!Worklist.empty()) {
        DomTreeNode *Node = Worklist.pop_back_val();
        unsigned ChildLevel = DomLevels[Node] + 1;
        for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
             CI != CE; ++CI) {
          DomLevels[*CI] = ChildLevel;
          Worklist.push_back(*CI);
        }
      }
    }

    // If we haven't computed a numbering for the BB's in the function, do so
    // now.
    if (BBNumbers.empty()) {
      unsigned ID = 0;
      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
        BBNumbers[I] = ID++;
    }

    // If we have an AST to keep updated, remember some pointer value that is
    // stored into the alloca.
    if (AST)
      PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;

    // Remember the dbg.declare intrinsic describing this alloca, if any.
    if (Info.DbgDeclare)
      AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;

    // Keep the reverse mapping of the 'Allocas' array for the rename pass.
    AllocaLookup[Allocas[AllocaNum]] = AllocaNum;

    // At this point, we're committed to promoting the alloca using IDF's, and
    // the standard SSA construction algorithm.  Determine which blocks need PHI
    // nodes and see if we can optimize out some work by avoiding insertion of
    // dead phi nodes.
    DetermineInsertionPoint(AI, AllocaNum, Info);
  }

  if (Allocas.empty())
    return; // All of the allocas must have been trivial!

  LBI.clear();

  // Set the incoming values for the basic block to be null values for all of
  // the alloca's.  We do this in case there is a load of a value that has not
  // been stored yet.  In this case, it will get this null value.
  //
  RenamePassData::ValVector Values(Allocas.size());
  for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
    Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());

  // Walks all basic blocks in the function performing the SSA rename algorithm
  // and inserting the phi nodes we marked as necessary
  //
  std::vector<RenamePassData> RenamePassWorkList;
  RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
  do {
    RenamePassData RPD;
    RPD.swap(RenamePassWorkList.back());
    RenamePassWorkList.pop_back();
    // RenamePass may add new worklist entries.
    RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
  } while (!RenamePassWorkList.empty());

  // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
  Visited.clear();

  // Remove the allocas themselves from the function.
  for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
    Instruction *A = Allocas[i];

    // If there are any uses of the alloca instructions left, they must be in
    // unreachable basic blocks that were not processed by walking the dominator
    // tree. Just delete the users now.
    if (!A->use_empty())
      A->replaceAllUsesWith(UndefValue::get(A->getType()));
    if (AST)
      AST->deleteValue(A);
    A->eraseFromParent();
  }

  // Remove alloca's dbg.declare instrinsics from the function.
  for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
    if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
      DDI->eraseFromParent();

  // Loop over all of the PHI nodes and see if there are any that we can get
  // rid of because they merge all of the same incoming values.  This can
  // happen due to undef values coming into the PHI nodes.  This process is
  // iterative, because eliminating one PHI node can cause others to be removed.
  bool EliminatedAPHI = true;
  while (EliminatedAPHI) {
    EliminatedAPHI = false;

    // Iterating over NewPhiNodes is deterministic, so it is safe to try to
    // simplify and RAUW them as we go.  If it was not, we could add uses to
    // the values we replace with in a non-deterministic order, thus creating
    // non-deterministic def->use chains.
    for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
             I = NewPhiNodes.begin(),
             E = NewPhiNodes.end();
         I != E;) {
      PHINode *PN = I->second;

      // If this PHI node merges one value and/or undefs, get the value.
      if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
        if (AST && PN->getType()->isPointerTy())
          AST->deleteValue(PN);
        PN->replaceAllUsesWith(V);
        PN->eraseFromParent();
        NewPhiNodes.erase(I++);
        EliminatedAPHI = true;
        continue;
      }
      ++I;
    }
  }

  // At this point, the renamer has added entries to PHI nodes for all reachable
  // code.  Unfortunately, there may be unreachable blocks which the renamer
  // hasn't traversed.  If this is the case, the PHI nodes may not
  // have incoming values for all predecessors.  Loop over all PHI nodes we have
  // created, inserting undef values if they are missing any incoming values.
  //
  for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
           I = NewPhiNodes.begin(),
           E = NewPhiNodes.end();
       I != E; ++I) {
    // We want to do this once per basic block.  As such, only process a block
    // when we find the PHI that is the first entry in the block.
    PHINode *SomePHI = I->second;
    BasicBlock *BB = SomePHI->getParent();
    if (&BB->front() != SomePHI)
      continue;

    // Only do work here if there the PHI nodes are missing incoming values.  We
    // know that all PHI nodes that were inserted in a block will have the same
    // number of incoming values, so we can just check any of them.
    if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
      continue;

    // Get the preds for BB.
    SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));

    // Ok, now we know that all of the PHI nodes are missing entries for some
    // basic blocks.  Start by sorting the incoming predecessors for efficient
    // access.
    std::sort(Preds.begin(), Preds.end());

    // Now we loop through all BB's which have entries in SomePHI and remove
    // them from the Preds list.
    for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
      // Do a log(n) search of the Preds list for the entry we want.
      SmallVectorImpl<BasicBlock *>::iterator EntIt = std::lower_bound(
          Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i));
      assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
             "PHI node has entry for a block which is not a predecessor!");

      // Remove the entry
      Preds.erase(EntIt);
    }

    // At this point, the blocks left in the preds list must have dummy
    // entries inserted into every PHI nodes for the block.  Update all the phi
    // nodes in this block that we are inserting (there could be phis before
    // mem2reg runs).
    unsigned NumBadPreds = SomePHI->getNumIncomingValues();
    BasicBlock::iterator BBI = BB->begin();
    while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
           SomePHI->getNumIncomingValues() == NumBadPreds) {
      Value *UndefVal = UndefValue::get(SomePHI->getType());
      for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
        SomePHI->addIncoming(UndefVal, Preds[pred]);
    }
  }

  NewPhiNodes.clear();
}
Пример #30
0
/// Lower a llvm.bitset.test call to its implementation. Returns the value to
/// replace the call with.
Value *LowerBitSets::lowerBitSetCall(
    CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
    Constant *CombinedGlobalIntAddr,
    const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
  Value *Ptr = CI->getArgOperand(0);
  const DataLayout &DL = M->getDataLayout();

  if (BSI.containsValue(DL, GlobalLayout, Ptr))
    return ConstantInt::getTrue(M->getContext());

  Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
      CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));

  BasicBlock *InitialBB = CI->getParent();

  IRBuilder<> B(CI);

  Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);

  if (BSI.isSingleOffset())
    return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);

  Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);

  Value *BitOffset;
  if (BSI.AlignLog2 == 0) {
    BitOffset = PtrOffset;
  } else {
    // We need to check that the offset both falls within our range and is
    // suitably aligned. We can check both properties at the same time by
    // performing a right rotate by log2(alignment) followed by an integer
    // comparison against the bitset size. The rotate will move the lower
    // order bits that need to be zero into the higher order bits of the
    // result, causing the comparison to fail if they are nonzero. The rotate
    // also conveniently gives us a bit offset to use during the load from
    // the bitset.
    Value *OffsetSHR =
        B.CreateLShr(PtrOffset, ConstantInt::get(IntPtrTy, BSI.AlignLog2));
    Value *OffsetSHL = B.CreateShl(
        PtrOffset,
        ConstantInt::get(IntPtrTy, DL.getPointerSizeInBits(0) - BSI.AlignLog2));
    BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
  }

  Constant *BitSizeConst = ConstantInt::get(IntPtrTy, BSI.BitSize);
  Value *OffsetInRange = B.CreateICmpULT(BitOffset, BitSizeConst);

  // If the bit set is all ones, testing against it is unnecessary.
  if (BSI.isAllOnes())
    return OffsetInRange;

  TerminatorInst *Term = SplitBlockAndInsertIfThen(OffsetInRange, CI, false);
  IRBuilder<> ThenB(Term);

  // Now that we know that the offset is in range and aligned, load the
  // appropriate bit from the bitset.
  Value *Bit = createBitSetTest(ThenB, BSI, BAI, BitOffset);

  // The value we want is 0 if we came directly from the initial block
  // (having failed the range or alignment checks), or the loaded bit if
  // we came from the block in which we loaded it.
  B.SetInsertPoint(CI);
  PHINode *P = B.CreatePHI(Int1Ty, 2);
  P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
  P->addIncoming(Bit, ThenB.GetInsertBlock());
  return P;
}