Ejemplo n.º 1
0
void doJumpTableViaSwitch(
        NativeModulePtr natM, 
        BasicBlock *& block, 
        InstPtr ip, 
        MCInst &inst)
{

    llvm::dbgs() << __FUNCTION__ << ": Doing jumpt table via switch\n";
    Function *F = block->getParent();
    Module *M = F->getParent();
    // we know this conforms to
    // jmp [reg*4+displacement]

    // sanity check
    const MCOperand& scale = OP(1);
    const MCOperand& index = OP(2);

    TASSERT(index.isReg(), "Conformant jump tables need index to be a register");
    TASSERT(scale.isImm() && scale.getImm() == 4, "Conformant jump tables have scale == 4");

    MCSJumpTablePtr jmpptr = ip->get_jump_table();

    // to ensure no negative entries
    Value *adjustment = CONST_V<32>(block, jmpptr->getInitialEntry());
    Value *reg_val = R_READ<32>(block, index.getReg());
    Value *real_index = 
        BinaryOperator::Create(Instruction::Add, adjustment, reg_val, "", block);
   
    // create a default block that just traps
    BasicBlock *defaultBlock = 
        BasicBlock::Create(block->getContext(), "", block->getParent(), 0);
    Function *trapFn = Intrinsic::getDeclaration(M, Intrinsic::trap);
    CallInst::Create(trapFn, "", defaultBlock);
    ReturnInst::Create(defaultBlock->getContext(), defaultBlock);
    // end default block

    const std::vector<VA> &jmpblocks = jmpptr->getJumpTable();

    // create a switch inst
    SwitchInst *theSwitch = SwitchInst::Create(
            real_index, 
            defaultBlock,
            jmpblocks.size(),
            block);

    // populate switch
    int myindex = 0;
    for(std::vector<VA>::const_iterator itr = jmpblocks.begin();
        itr != jmpblocks.end();
        itr++) 
    {
        std::string  bbname = "block_0x"+to_string<VA>(*itr, std::hex);
        BasicBlock *toBlock = bbFromStrName(bbname, F);
        TASSERT(toBlock != NULL, "Could not find block: "+bbname);
        theSwitch->addCase(CONST_V<32>(block, myindex), toBlock);
        ++myindex;
    }

}
static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
                         BasicBlock &CurrBB, Function::iterator &BB,
                         const TargetTransformInfo *TTI) {
  // There is no need to change the IR, since backend will emit sqrt
  // instruction if the call has already been marked read-only.
  if (Call->onlyReadsMemory())
    return false;

  if (!DebugCounter::shouldExecute(PILCounter))
    return false;

  // Do the following transformation:
  //
  // (before)
  // dst = sqrt(src)
  //
  // (after)
  // v0 = sqrt_noreadmem(src) # native sqrt instruction.
  // [if (v0 is a NaN) || if (src < 0)]
  //   v1 = sqrt(src)         # library call.
  // dst = phi(v0, v1)
  //

  // Move all instructions following Call to newly created block JoinBB.
  // Create phi and replace all uses.
  BasicBlock *JoinBB = llvm::SplitBlock(&CurrBB, Call->getNextNode());
  IRBuilder<> Builder(JoinBB, JoinBB->begin());
  Type *Ty = Call->getType();
  PHINode *Phi = Builder.CreatePHI(Ty, 2);
  Call->replaceAllUsesWith(Phi);

  // Create basic block LibCallBB and insert a call to library function sqrt.
  BasicBlock *LibCallBB = BasicBlock::Create(CurrBB.getContext(), "call.sqrt",
                                             CurrBB.getParent(), JoinBB);
  Builder.SetInsertPoint(LibCallBB);
  Instruction *LibCall = Call->clone();
  Builder.Insert(LibCall);
  Builder.CreateBr(JoinBB);

  // Add attribute "readnone" so that backend can use a native sqrt instruction
  // for this call. Insert a FP compare instruction and a conditional branch
  // at the end of CurrBB.
  Call->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
  CurrBB.getTerminator()->eraseFromParent();
  Builder.SetInsertPoint(&CurrBB);
  Value *FCmp = TTI->isFCmpOrdCheaperThanFCmpZero(Ty)
                    ? Builder.CreateFCmpORD(Call, Call)
                    : Builder.CreateFCmpOGE(Call->getOperand(0),
                                            ConstantFP::get(Ty, 0.0));
  Builder.CreateCondBr(FCmp, JoinBB, LibCallBB);

  // Add phi operands.
  Phi->addIncoming(Call, &CurrBB);
  Phi->addIncoming(LibCall, LibCallBB);

  BB = JoinBB->getIterator();
  return true;
}
Ejemplo n.º 3
0
/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
/// trivial: that is, that the condition controls whether or not the loop does
/// anything at all.  If this is a trivial condition, unswitching produces no
/// code duplications (equivalently, it produces a simpler loop and a new empty
/// loop, which gets deleted).
///
/// If this is a trivial condition, return true, otherwise return false.  When
/// returning true, this sets Cond and Val to the condition that controls the
/// trivial condition: when Cond dynamically equals Val, the loop is known to
/// exit.  Finally, this sets LoopExit to the BB that the loop exits to when
/// Cond == Val.
///
bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
                                       BasicBlock **LoopExit) {
  BasicBlock *Header = currentLoop->getHeader();
  TerminatorInst *HeaderTerm = Header->getTerminator();
  LLVMContext &Context = Header->getContext();
  
  BasicBlock *LoopExitBB = 0;
  if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
    // If the header block doesn't end with a conditional branch on Cond, we
    // can't handle it.
    if (!BI->isConditional() || BI->getCondition() != Cond)
      return false;
  
    // Check to see if a successor of the branch is guaranteed to 
    // exit through a unique exit block without having any 
    // side-effects.  If so, determine the value of Cond that causes it to do
    // this.
    if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
                                             BI->getSuccessor(0)))) {
      if (Val) *Val = ConstantInt::getTrue(Context);
    } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
                                                    BI->getSuccessor(1)))) {
      if (Val) *Val = ConstantInt::getFalse(Context);
    }
  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
    // If this isn't a switch on Cond, we can't handle it.
    if (SI->getCondition() != Cond) return false;
    
    // Check to see if a successor of the switch is guaranteed to go to the
    // latch block or exit through a one exit block without having any 
    // side-effects.  If so, determine the value of Cond that causes it to do
    // this.  Note that we can't trivially unswitch on the default case.
    for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
      if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
                                               SI->getSuccessor(i)))) {
        // Okay, we found a trivial case, remember the value that is trivial.
        if (Val) *Val = SI->getCaseValue(i);
        break;
      }
  }

  // If we didn't find a single unique LoopExit block, or if the loop exit block
  // contains phi nodes, this isn't trivial.
  if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
    return false;   // Can't handle this.
  
  if (LoopExit) *LoopExit = LoopExitBB;
  
  // We already know that nothing uses any scalar values defined inside of this
  // loop.  As such, we just have to check to see if this loop will execute any
  // side-effecting instructions (e.g. stores, calls, volatile loads) in the
  // part of the loop that the code *would* execute.  We already checked the
  // tail, check the header now.
  for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I)
    if (I->mayHaveSideEffects())
      return false;
  return true;
}
Ejemplo n.º 4
0
void GEOSProfiler::instrumentBasicBlock(BasicBlock  &BB, Module &M, int ID) {
  IRBuilder<> Builder(BB.getContext());

  auto ConstantId = ConstantInt::get(Builder.getInt32Ty(), ID);
  CallInst *CountCall = Builder.CreateCall(
      M.getFunction("count"), ConstantId);

  CountCall->insertBefore(BB.getFirstNonPHI());
}
Ejemplo n.º 5
0
void AliasCheckerInliner::inlineReportOrSilenceIfMissed(CallInst *CI) {
  BasicBlock *BB = CI->getParent();
  CallSite CS(CI);
  Value *P = CS.getArgument(0), *Q = CS.getArgument(2);
  Value *VIDOfP = CS.getArgument(1), *VIDOfQ = CS.getArgument(3);
  // BB:
  //   xxx
  //   call ReportIfMissed(P, VID(P), Q, VID(Q))
  //   yyy
  //
  // =>
  //
  // BB:
  //   xxx
  //   br NewBB
  // NewBB:
  //   call ReportIfMissed(P, VID(P), Q, VID(Q))
  //   yyy
  //
  // =>
  //
  // BB:
  //   xxx
  //   C1 = icmp eq P, Q
  //   C2 = icmp ne P, null
  //   C3 = and C1, C2
  //   br C3, ReportBB, NewBB
  // NewBB:
  //   yyy
  // ReportBB:
  //   ReportMissingAlias(VIDOfP, VIDOfQ, P)
  //   br NewBB

  // Create NewBB.
  BasicBlock *NewBB = BB->splitBasicBlock(CI, "bb");
  // Create ReportBB.
  BasicBlock *ReportBB = BasicBlock::Create(BB->getContext(),
                                            "report",
                                            BB->getParent());
  vector<Value *> Args;
  Args.push_back(VIDOfP);
  Args.push_back(VIDOfQ);
  Args.push_back(P);
  if (CI->getCalledFunction() == ReportIfMissed)
    CallInst::Create(ReportMissingAlias, Args, "", ReportBB);
  else if (CI->getCalledFunction() == SilenceIfMissed)
    CallInst::Create(SilenceMissingAlias, Args, "", ReportBB);
  else
    assert(false);
  BranchInst::Create(NewBB, ReportBB);
  // Update BB.
  insertBranchTo(BB, ReportBB, NewBB, P, Q);
  CI->eraseFromParent();
}
Ejemplo n.º 6
0
void ScopAnnotator::pushLoop(Loop *L, bool IsParallel) {

  ActiveLoops.push_back(L);
  if (!IsParallel)
    return;

  BasicBlock *Header = L->getHeader();
  MDNode *Id = getID(Header->getContext());
  assert(Id->getOperand(0) == Id && "Expected Id to be a self-reference");
  assert(Id->getNumOperands() == 1 && "Unexpected extra operands in Id");
  MDNode *Ids = ParallelLoops.empty()
                    ? Id
                    : MDNode::concatenate(ParallelLoops.back(), Id);
  ParallelLoops.push_back(Ids);
}
bool ExpandGetElementPtr::runOnBasicBlock(BasicBlock &BB) {
    bool Modified = false;
    DataLayout DL(BB.getParent()->getParent());
    Type *PtrType = DL.getIntPtrType(BB.getContext());

    for (BasicBlock::InstListType::iterator Iter = BB.begin();
            Iter != BB.end(); ) {
        Instruction *Inst = Iter++;
        if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
            Modified = true;
            ExpandGEP(GEP, &DL, PtrType);
        }
    }
    return Modified;
}
Ejemplo n.º 8
0
/// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen,
/// but also creates the ElseBlock.
/// Before:
///   Head
///   SplitBefore
///   Tail
/// After:
///   Head
///   if (Cond)
///     ThenBlock
///   else
///     ElseBlock
///   SplitBefore
///   Tail
void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
                                         TerminatorInst **ThenTerm,
                                         TerminatorInst **ElseTerm,
                                         MDNode *BranchWeights) {
  BasicBlock *Head = SplitBefore->getParent();
  BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
  TerminatorInst *HeadOldTerm = Head->getTerminator();
  LLVMContext &C = Head->getContext();
  BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
  BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
  *ThenTerm = BranchInst::Create(Tail, ThenBlock);
  *ElseTerm = BranchInst::Create(Tail, ElseBlock);
  BranchInst *HeadNewTerm =
    BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond);
  HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
  ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
}
Ejemplo n.º 9
0
TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
    bool Unreachable, MDNode *BranchWeights) {
  Instruction *SplitBefore = Cmp->getNextNode();
  BasicBlock *Head = SplitBefore->getParent();
  BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
  TerminatorInst *HeadOldTerm = Head->getTerminator();
  LLVMContext &C = Head->getContext();
  BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
  TerminatorInst *CheckTerm;
  if (Unreachable)
    CheckTerm = new UnreachableInst(C, ThenBlock);
  else
    CheckTerm = BranchInst::Create(Tail, ThenBlock);
  BranchInst *HeadNewTerm =
    BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
  HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
  ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
  return CheckTerm;
}
Ejemplo n.º 10
0
void MemoryInstrumenter::instrumentMemoryAllocation(Value *Start,
                                                    Value *Size,
                                                    Value *Success,
                                                    Instruction *Loc) {
  IDAssigner &IDA = getAnalysis<IDAssigner>();

  assert(Start->getType()->isPointerTy());
  assert(Size);
  // The size argument to HookMemAlloc must be long.
  assert(Size->getType() == LongType);
  assert(Success == NULL || Success->getType()->isIntegerTy(1));
  assert(Loc);

  vector<Value *> Args;
  // Arg 1: value ID
  Args.push_back(ConstantInt::get(IntType, IDA.getValueID(Start)));
  // Arg 2: starting address
  if (Start->getType() != CharStarType) {
    Start = new BitCastInst(Start, CharStarType, "", Loc);
  }
  Args.push_back(Start);
  // Arg 3: bound
  Args.push_back(Size);

  // If the allocation is always successful, such as new, we create the
  // memory allocation hook directly; otherwise, we need to check the condition
  // and add the memory allocation hook.
  if (Success == NULL) {
    CallInst::Create(MemAllocHook, Args, "", Loc);
  } else {
    BasicBlock *BB = Loc->getParent();
    BasicBlock *RestBB = BB->splitBasicBlock(Loc, "rest");
    BasicBlock *CallMallocHookBB = BasicBlock::Create(BB->getContext(),
                                                      "call_malloc_hook",
                                                      BB->getParent(),
                                                      RestBB);
    BB->getTerminator()->eraseFromParent();
    BranchInst::Create(CallMallocHookBB, RestBB, Success, BB);
    CallInst::Create(MemAllocHook, Args, "", CallMallocHookBB);
    BranchInst::Create(RestBB, CallMallocHookBB);
  }
}
Ejemplo n.º 11
0
TerminatorInst *
llvm::SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
                                bool Unreachable, MDNode *BranchWeights,
                                DominatorTree *DT, LoopInfo *LI) {
  BasicBlock *Head = SplitBefore->getParent();
  BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
  TerminatorInst *HeadOldTerm = Head->getTerminator();
  LLVMContext &C = Head->getContext();
  BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
  TerminatorInst *CheckTerm;
  if (Unreachable)
    CheckTerm = new UnreachableInst(C, ThenBlock);
  else
    CheckTerm = BranchInst::Create(Tail, ThenBlock);
  CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
  BranchInst *HeadNewTerm =
    BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cond);
  HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
  ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);

  if (DT) {
    if (DomTreeNode *OldNode = DT->getNode(Head)) {
      std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());

      DomTreeNode *NewNode = DT->addNewBlock(Tail, Head);
      for (DomTreeNode *Child : Children)
        DT->changeImmediateDominator(Child, NewNode);

      // Head dominates ThenBlock.
      DT->addNewBlock(ThenBlock, Head);
    }
  }

  if (LI) {
    if (Loop *L = LI->getLoopFor(Head)) {
      L->addBasicBlockToLoop(ThenBlock, *LI);
      L->addBasicBlockToLoop(Tail, *LI);
    }
  }

  return CheckTerm;
}
Ejemplo n.º 12
0
void StructurizeCFG::handleLoops(bool ExitUseAllowed,
                                 BasicBlock *LoopEnd) {
  RegionNode *Node = Order.front();
  BasicBlock *LoopStart = Node->getEntry();

  if (!Loops.count(LoopStart)) {
    wireFlow(ExitUseAllowed, LoopEnd);
    return;
  }

  if (!isPredictableTrue(Node))
    LoopStart = needPrefix(true);

  LoopEnd = Loops[Node->getEntry()];
  wireFlow(false, LoopEnd);
  while (!Visited.count(LoopEnd)) {
    handleLoops(false, LoopEnd);
  }

  // If the start of the loop is the entry block, we can't branch to it so
  // insert a new dummy entry block.
  Function *LoopFunc = LoopStart->getParent();
  if (LoopStart == &LoopFunc->getEntryBlock()) {
    LoopStart->setName("entry.orig");

    BasicBlock *NewEntry =
      BasicBlock::Create(LoopStart->getContext(),
                         "entry",
                         LoopFunc,
                         LoopStart);
    BranchInst::Create(LoopStart, NewEntry);
    DT->setNewRoot(NewEntry);
  }

  // Create an extra loop end node
  LoopEnd = needPrefix(false);
  BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
  LoopConds.push_back(BranchInst::Create(Next, LoopStart,
                                         BoolUndef, LoopEnd));
  addPhiValues(LoopEnd, LoopStart);
  setPrevNode(Next);
}
Ejemplo n.º 13
0
//semantics of the RTL aullshr intrinsic stub
Value *replace_aullshr(Module *M, Instruction *I, Pass *P) {

  BasicBlock *topHalf = I->getParent();
  
  BasicBlock *bottomHalf = llvm::SplitBlock(topHalf, I, P);

  BasicBlock *newBlock = BasicBlock::Create(
          topHalf->getContext(), 
          "aullshr_MainBlock",
          topHalf->getParent());

  // remove old branch, and create a new branch to newBlock
  topHalf->getTerminator()->eraseFromParent();
  BranchInst::Create(newBlock, topHalf);


  // emit aullshr and link that block to bottomHalf
  Value *new_v = emit_aullshr(newBlock, bottomHalf);


  return new_v;
}
Ejemplo n.º 14
0
/// Insert code in the prolog code when unrolling a loop with a
/// run-time trip-count.
///
/// This method assumes that the loop unroll factor is total number
/// of loop bodes in the loop after unrolling. (Some folks refer
/// to the unroll factor as the number of *extra* copies added).
/// We assume also that the loop unroll factor is a power-of-two. So, after
/// unrolling the loop, the number of loop bodies executed is 2,
/// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch
/// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for
/// the switch instruction is generated.
///
///    extraiters = tripcount % loopfactor
///    if (extraiters == 0) jump Loop:
///    if (extraiters == loopfactor) jump L1
///    if (extraiters == loopfactor-1) jump L2
///    ...
///    L1:  LoopBody;
///    L2:  LoopBody;
///    ...
///    if tripcount < loopfactor jump End
///    Loop:
///    ...
///    End:
///
bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
                                   LPPassManager *LPM) {
  // for now, only unroll loops that contain a single exit
  if (!L->getExitingBlock())
    return false;

  // Make sure the loop is in canonical form, and there is a single
  // exit block only.
  if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
    return false;

  // Use Scalar Evolution to compute the trip count.  This allows more
  // loops to be unrolled than relying on induction var simplification
  if (!LPM)
    return false;
  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
  if (!SE)
    return false;

  // Only unroll loops with a computable trip count and the trip count needs
  // to be an int value (allowing a pointer type is a TODO item)
  const SCEV *BECount = SE->getBackedgeTakenCount(L);
  if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
    return false;

  // Add 1 since the backedge count doesn't include the first loop iteration
  const SCEV *TripCountSC =
    SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
  if (isa<SCEVCouldNotCompute>(TripCountSC))
    return false;

  // We only handle cases when the unroll factor is a power of 2.
  // Count is the loop unroll factor, the number of extra copies added + 1.
  if ((Count & (Count-1)) != 0)
    return false;

  // If this loop is nested, then the loop unroller changes the code in
  // parent loop, so the Scalar Evolution pass needs to be run again
  if (Loop *ParentLoop = L->getParentLoop())
    SE->forgetLoop(ParentLoop);

  BasicBlock *PH = L->getLoopPreheader();
  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  // It helps to splits the original preheader twice, one for the end of the
  // prolog code and one for a new loop preheader
  BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
  BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
  BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());

  // Compute the number of extra iterations required, which is:
  //  extra iterations = run-time trip count % (loop unroll factor + 1)
  SCEVExpander Expander(*SE, "loop-unroll");
  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
                                            PreHeaderBR);
  Type *CountTy = TripCount->getType();
  BinaryOperator *ModVal =
    BinaryOperator::CreateURem(TripCount,
                               ConstantInt::get(CountTy, Count),
                               "xtraiter");
  ModVal->insertBefore(PreHeaderBR);

  // Check if for no extra iterations, then jump to unrolled loop
  Value *BranchVal = new ICmpInst(PreHeaderBR,
                                  ICmpInst::ICMP_NE, ModVal,
                                  ConstantInt::get(CountTy, 0), "lcmp");
  // Branch to either the extra iterations or the unrolled loop
  // We will fix up the true branch label when adding loop body copies
  BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
  assert(PreHeaderBR->isUnconditional() &&
         PreHeaderBR->getSuccessor(0) == PEnd &&
         "CFG edges in Preheader are not correct");
  PreHeaderBR->eraseFromParent();

  ValueToValueMapTy LVMap;
  Function *F = Header->getParent();
  // These variables are used to update the CFG links in each iteration
  BasicBlock *CompareBB = nullptr;
  BasicBlock *LastLoopBB = PH;
  // Get an ordered list of blocks in the loop to help with the ordering of the
  // cloned blocks in the prolog code
  LoopBlocksDFS LoopBlocks(L);
  LoopBlocks.perform(LI);

  //
  // For each extra loop iteration, create a copy of the loop's basic blocks
  // and generate a condition that branches to the copy depending on the
  // number of 'left over' iterations.
  //
  for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
    std::vector<BasicBlock*> NewBlocks;
    ValueToValueMapTy VMap;

    // Clone all the basic blocks in the loop, but we don't clone the loop
    // This function adds the appropriate CFG connections.
    CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
                    LoopBlocks, VMap, LVMap, LI);
    LastLoopBB = cast<BasicBlock>(VMap[Latch]);

    // Insert the cloned blocks into function just before the original loop
    F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
                                  NewBlocks[0], F->end());

    // Generate the code for the comparison which determines if the loop
    // prolog code needs to be executed.
    if (leftOverIters == Count-1) {
      // There is no compare block for the fall-thru case when for the last
      // left over iteration
      CompareBB = NewBlocks[0];
    } else {
      // Create a new block for the comparison
      BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
                                             F, CompareBB);
      if (Loop *ParentLoop = L->getParentLoop()) {
        // Add the new block to the parent loop, if needed
        ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
      }

      // The comparison w/ the extra iteration value and branch
      Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
                                      ConstantInt::get(CountTy, leftOverIters),
                                      "un.tmp");
      // Branch to either the extra iterations or the unrolled loop
      BranchInst::Create(NewBlocks[0], CompareBB,
                         BranchVal, NewBB);
      CompareBB = NewBB;
      PH->getTerminator()->setSuccessor(0, NewBB);
      VMap[NewPH] = CompareBB;
    }

    // Rewrite the cloned instruction operands to use the values
    // created when the clone is created.
    for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
             E = NewBlocks[i]->end(); I != E; ++I) {
        RemapInstruction(I, VMap,
                         RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
      }
    }
  }

  // Connect the prolog code to the original loop and update the
  // PHI functions.
  ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
                LPM->getAsPass());
  NumRuntimeUnrolled++;
  return true;
}
/*
  in addition to the original condition of the loop, insert one cond
  on the virtual iterator, given the linf and lsup bounds for the chunk
  this cond exits and returns to the decision block to start a new chunk
*/
BasicBlock *insertChunkCond(Loop *&L, LoopInfo *LI, Value *vi, Value *lsup,
                            BasicBlock *dcb, Value *vi_dcb_val,
                            PHINode *&phi_vi) {

  BasicBlock *H = L->getHeader();

  BasicBlock *newCond = BasicBlock::Create(
      H->getContext(), Twine("__kernel__" + H->getName().str() + "_viCond"),
      H->getParent(), H);

  std::vector<BasicBlock *> Lblocks = L->getBlocks();

  BasicBlock *exitBlock = BasicBlock::Create(
      H->getContext(), Twine(H->getName().str() + "_exitChunk"), H->getParent(),
      H);
  BranchInst::Create(dcb, exitBlock);

  phi_vi = PHINode::Create(Type::getInt64Ty(H->getContext()), 2, "vi_value",
                           newCond);
  phi_vi->addIncoming(vi_dcb_val, dcb);

  LoadInst *load_lsup = new LoadInst(lsup, "lsup_value", newCond);

  ICmpInst *cmp = new ICmpInst(*newCond, ICmpInst::ICMP_SLT, phi_vi, load_lsup,
                               vi->getName() + "_cmp");

  // Make sure all predecessors now go to our new condition
  std::vector<TerminatorInst *> termInstrs;
  BasicBlock *lp = L->getLoopPredecessor();

  for (auto it = pred_begin(H), end = pred_end(H); it != end; ++it) {
    if ((*it) == lp) {
      // Original entry should be redirected to dcb
      TerminatorInst *tinstr = (*it)->getTerminator();
      for (auto it = tinstr->op_begin(), end = tinstr->op_end(); it != end;
           ++it) {
        Use *use = &*it;
        if (use->get() == H) {
          use->set(dcb);
        }
      }
    } else {
      termInstrs.push_back((*it)->getTerminator());
    }
  }

  for (auto &tinstr : termInstrs) {
    for (auto it = tinstr->op_begin(), end = tinstr->op_end(); it != end;
         ++it) {
      Use *use = &*it;
      if (use->get() == H) {
        use->set(newCond);
      }
    }
  }

  BranchInst::Create(H, exitBlock, cmp, newCond);

  // update loop info
  if (L != LI->getLoopFor(newCond)) {
    L->addBasicBlockToLoop(newCond, *LI);
  }

  L->moveToHeader(newCond);

  Loop *Lp = L->getParentLoop();
  if (Lp)
    Lp->addBasicBlockToLoop(exitBlock, *LI);
  return newCond;
}
Ejemplo n.º 16
0
/// \brief This method is called when the specified loop has more than one
/// backedge in it.
///
/// If this occurs, revector all of these backedges to target a new basic block
/// and have that block branch to the loop header.  This ensures that loops
/// have exactly one backedge.
static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
                                             DominatorTree *DT, LoopInfo *LI) {
  assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");

  // Get information about the loop
  BasicBlock *Header = L->getHeader();
  Function *F = Header->getParent();

  // Unique backedge insertion currently depends on having a preheader.
  if (!Preheader)
    return nullptr;

  // The header is not a landing pad; preheader insertion should ensure this.
  assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");

  // Figure out which basic blocks contain back-edges to the loop header.
  std::vector<BasicBlock*> BackedgeBlocks;
  for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
    BasicBlock *P = *I;

    // Indirectbr edges cannot be split, so we must fail if we find one.
    if (isa<IndirectBrInst>(P->getTerminator()))
      return nullptr;

    if (P != Preheader) BackedgeBlocks.push_back(P);
  }

  // Create and insert the new backedge block...
  BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
                                           Header->getName() + ".backedge", F);
  BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
  BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());

  DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
               << BEBlock->getName() << "\n");

  // Move the new backedge block to right after the last backedge block.
  Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
  F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);

  // Now that the block has been inserted into the function, create PHI nodes in
  // the backedge block which correspond to any PHI nodes in the header block.
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    PHINode *PN = cast<PHINode>(I);
    PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
                                     PN->getName()+".be", BETerminator);

    // Loop over the PHI node, moving all entries except the one for the
    // preheader over to the new PHI node.
    unsigned PreheaderIdx = ~0U;
    bool HasUniqueIncomingValue = true;
    Value *UniqueValue = nullptr;
    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
      BasicBlock *IBB = PN->getIncomingBlock(i);
      Value *IV = PN->getIncomingValue(i);
      if (IBB == Preheader) {
        PreheaderIdx = i;
      } else {
        NewPN->addIncoming(IV, IBB);
        if (HasUniqueIncomingValue) {
          if (!UniqueValue)
            UniqueValue = IV;
          else if (UniqueValue != IV)
            HasUniqueIncomingValue = false;
        }
      }
    }

    // Delete all of the incoming values from the old PN except the preheader's
    assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
    if (PreheaderIdx != 0) {
      PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
      PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
    }
    // Nuke all entries except the zero'th.
    for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
      PN->removeIncomingValue(e-i, false);

    // Finally, add the newly constructed PHI node as the entry for the BEBlock.
    PN->addIncoming(NewPN, BEBlock);

    // As an optimization, if all incoming values in the new PhiNode (which is a
    // subset of the incoming values of the old PHI node) have the same value,
    // eliminate the PHI Node.
    if (HasUniqueIncomingValue) {
      NewPN->replaceAllUsesWith(UniqueValue);
      BEBlock->getInstList().erase(NewPN);
    }
  }

  // Now that all of the PHI nodes have been inserted and adjusted, modify the
  // backedge blocks to just to the BEBlock instead of the header.
  for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
    TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
    for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
      if (TI->getSuccessor(Op) == Header)
        TI->setSuccessor(Op, BEBlock);
  }

  //===--- Update all analyses which we must preserve now -----------------===//

  // Update Loop Information - we know that this block is now in the current
  // loop and all parent loops.
  L->addBasicBlockToLoop(BEBlock, *LI);

  // Update dominator information
  DT->splitBlock(BEBlock);

  return BEBlock;
}
Ejemplo n.º 17
0
bool LoopIndexSplit::splitLoop() {
  SplitCondition = NULL;
  if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
      || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
    return false;
  BasicBlock *Header = L->getHeader();
  BasicBlock *Latch = L->getLoopLatch();
  BranchInst *SBR = NULL; // Split Condition Branch
  BranchInst *EBR = cast<BranchInst>(ExitCondition->getParent()->getTerminator());
  // If Exiting block includes loop variant instructions then this
  // loop may not be split safely.
  BasicBlock *ExitingBlock = ExitCondition->getParent();
  if (!cleanBlock(ExitingBlock)) return false;

  LLVMContext &Context = Header->getContext();

  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
       I != E; ++I) {
    BranchInst *BR = dyn_cast<BranchInst>((*I)->getTerminator());
    if (!BR || BR->isUnconditional()) continue;
    ICmpInst *CI = dyn_cast<ICmpInst>(BR->getCondition());
    if (!CI || CI == ExitCondition 
        || CI->getPredicate() == ICmpInst::ICMP_NE
        || CI->getPredicate() == ICmpInst::ICMP_EQ)
      continue;

    // Unable to handle triangle loops at the moment.
    // In triangle loop, split condition is in header and one of the
    // the split destination is loop latch. If split condition is EQ
    // then such loops are already handle in processOneIterationLoop().
    if (Header == (*I)
        && (Latch == BR->getSuccessor(0) || Latch == BR->getSuccessor(1)))
      continue;

    // If the block does not dominate the latch then this is not a diamond.
    // Such loop may not benefit from index split.
    if (!DT->dominates((*I), Latch))
      continue;

    // If split condition branches heads do not have single predecessor, 
    // SplitCondBlock, then is not possible to remove inactive branch.
    if (!BR->getSuccessor(0)->getSinglePredecessor() 
        || !BR->getSuccessor(1)->getSinglePredecessor())
      return false;

    // If the merge point for BR is not loop latch then skip this condition.
    if (BR->getSuccessor(0) != Latch) {
      DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
      assert (DF0 != DF->end() && "Unable to find dominance frontier");
      if (!DF0->second.count(Latch))
        continue;
    }
    
    if (BR->getSuccessor(1) != Latch) {
      DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
      assert (DF1 != DF->end() && "Unable to find dominance frontier");
      if (!DF1->second.count(Latch))
        continue;
    }
    SplitCondition = CI;
    SBR = BR;
    break;
  }
   
  if (!SplitCondition)
    return false;

  // If the predicate sign does not match then skip.
  if (ExitCondition->isSigned() != SplitCondition->isSigned())
    return false;

  unsigned EVOpNum = (ExitCondition->getOperand(1) == IVExitValue);
  unsigned SVOpNum = IVBasedValues.count(SplitCondition->getOperand(0));
  Value *SplitValue = SplitCondition->getOperand(SVOpNum);
  if (!L->isLoopInvariant(SplitValue))
    return false;
  if (!IVBasedValues.count(SplitCondition->getOperand(!SVOpNum)))
    return false;

  // Normalize loop conditions so that it is easier to calculate new loop
  // bounds.
  if (IVisGT(*ExitCondition) || IVisGE(*ExitCondition)) {
    ExitCondition->setPredicate(ExitCondition->getInversePredicate());
    BasicBlock *T = EBR->getSuccessor(0);
    EBR->setSuccessor(0, EBR->getSuccessor(1));
    EBR->setSuccessor(1, T);
  }

  if (IVisGT(*SplitCondition) || IVisGE(*SplitCondition)) {
    SplitCondition->setPredicate(SplitCondition->getInversePredicate());
    BasicBlock *T = SBR->getSuccessor(0);
    SBR->setSuccessor(0, SBR->getSuccessor(1));
    SBR->setSuccessor(1, T);
  }

  //[*] Calculate new loop bounds.
  Value *AEV = SplitValue;
  Value *BSV = SplitValue;
  bool Sign = SplitCondition->isSigned();
  Instruction *PHTerm = L->getLoopPreheader()->getTerminator();

  if (IVisLT(*ExitCondition)) {
    if (IVisLT(*SplitCondition)) {
      /* Do nothing */
    }
    else if (IVisLE(*SplitCondition)) {
      AEV = getPlusOne(SplitValue, Sign, PHTerm, Context);
      BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
    } else {
      assert (0 && "Unexpected split condition!");
    }
  }
  else if (IVisLE(*ExitCondition)) {
    if (IVisLT(*SplitCondition)) {
      AEV = getMinusOne(SplitValue, Sign, PHTerm, Context);
    }
    else if (IVisLE(*SplitCondition)) {
      BSV = getPlusOne(SplitValue, Sign, PHTerm, Context);
    } else {
      assert (0 && "Unexpected split condition!");
    }
  } else {
    assert (0 && "Unexpected exit condition!");
  }
  AEV = getMin(AEV, IVExitValue, Sign, PHTerm);
  BSV = getMax(BSV, IVStartValue, Sign, PHTerm);

  // [*] Clone Loop
  DenseMap<const Value *, Value *> ValueMap;
  Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this);
  Loop *ALoop = L;

  // [*] ALoop's exiting edge enters BLoop's header.
  //    ALoop's original exit block becomes BLoop's exit block.
  PHINode *B_IndVar = cast<PHINode>(ValueMap[IndVar]);
  BasicBlock *A_ExitingBlock = ExitCondition->getParent();
  BranchInst *A_ExitInsn =
    dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
  assert (A_ExitInsn && "Unable to find suitable loop exit branch");
  BasicBlock *B_ExitBlock = A_ExitInsn->getSuccessor(1);
  BasicBlock *B_Header = BLoop->getHeader();
  if (ALoop->contains(B_ExitBlock)) {
    B_ExitBlock = A_ExitInsn->getSuccessor(0);
    A_ExitInsn->setSuccessor(0, B_Header);
  } else
    A_ExitInsn->setSuccessor(1, B_Header);

  // [*] Update ALoop's exit value using new exit value.
  ExitCondition->setOperand(EVOpNum, AEV);

  // [*] Update BLoop's header phi nodes. Remove incoming PHINode's from
  //     original loop's preheader. Add incoming PHINode values from
  //     ALoop's exiting block. Update BLoop header's domiantor info.

  // Collect inverse map of Header PHINodes.
  DenseMap<Value *, Value *> InverseMap;
  for (BasicBlock::iterator BI = ALoop->getHeader()->begin(), 
         BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
      PHINode *PNClone = cast<PHINode>(ValueMap[PN]);
      InverseMap[PNClone] = PN;
    } else
      break;
  }

  BasicBlock *A_Preheader = ALoop->getLoopPreheader();
  for (BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
       BI != BE; ++BI) {
    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
      // Remove incoming value from original preheader.
      PN->removeIncomingValue(A_Preheader);

      // Add incoming value from A_ExitingBlock.
      if (PN == B_IndVar)
        PN->addIncoming(BSV, A_ExitingBlock);
      else { 
        PHINode *OrigPN = cast<PHINode>(InverseMap[PN]);
        Value *V2 = NULL;
        // If loop header is also loop exiting block then
        // OrigPN is incoming value for B loop header.
        if (A_ExitingBlock == ALoop->getHeader())
          V2 = OrigPN;
        else
          V2 = OrigPN->getIncomingValueForBlock(A_ExitingBlock);
        PN->addIncoming(V2, A_ExitingBlock);
      }
    } else
      break;
  }

  DT->changeImmediateDominator(B_Header, A_ExitingBlock);
  DF->changeImmediateDominator(B_Header, A_ExitingBlock, DT);
  
  // [*] Update BLoop's exit block. Its new predecessor is BLoop's exit
  //     block. Remove incoming PHINode values from ALoop's exiting block.
  //     Add new incoming values from BLoop's incoming exiting value.
  //     Update BLoop exit block's dominator info..
  BasicBlock *B_ExitingBlock = cast<BasicBlock>(ValueMap[A_ExitingBlock]);
  for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
       BI != BE; ++BI) {
    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
      PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)], 
                                                            B_ExitingBlock);
      PN->removeIncomingValue(A_ExitingBlock);
    } else
      break;
  }

  DT->changeImmediateDominator(B_ExitBlock, B_ExitingBlock);
  DF->changeImmediateDominator(B_ExitBlock, B_ExitingBlock, DT);

  //[*] Split ALoop's exit edge. This creates a new block which
  //    serves two purposes. First one is to hold PHINode defnitions
  //    to ensure that ALoop's LCSSA form. Second use it to act
  //    as a preheader for BLoop.
  BasicBlock *A_ExitBlock = SplitEdge(A_ExitingBlock, B_Header, this);

  //[*] Preserve ALoop's LCSSA form. Create new forwarding PHINodes
  //    in A_ExitBlock to redefine outgoing PHI definitions from ALoop.
  for(BasicBlock::iterator BI = B_Header->begin(), BE = B_Header->end();
      BI != BE; ++BI) {
    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
      Value *V1 = PN->getIncomingValueForBlock(A_ExitBlock);
      PHINode *newPHI = PHINode::Create(PN->getType(), PN->getName());
      newPHI->addIncoming(V1, A_ExitingBlock);
      A_ExitBlock->getInstList().push_front(newPHI);
      PN->removeIncomingValue(A_ExitBlock);
      PN->addIncoming(newPHI, A_ExitBlock);
    } else
      break;
  }

  //[*] Eliminate split condition's inactive branch from ALoop.
  BasicBlock *A_SplitCondBlock = SplitCondition->getParent();
  BranchInst *A_BR = cast<BranchInst>(A_SplitCondBlock->getTerminator());
  BasicBlock *A_InactiveBranch = NULL;
  BasicBlock *A_ActiveBranch = NULL;
  A_ActiveBranch = A_BR->getSuccessor(0);
  A_InactiveBranch = A_BR->getSuccessor(1);
  A_BR->setUnconditionalDest(A_ActiveBranch);
  removeBlocks(A_InactiveBranch, L, A_ActiveBranch);

  //[*] Eliminate split condition's inactive branch in from BLoop.
  BasicBlock *B_SplitCondBlock = cast<BasicBlock>(ValueMap[A_SplitCondBlock]);
  BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
  BasicBlock *B_InactiveBranch = NULL;
  BasicBlock *B_ActiveBranch = NULL;
  B_ActiveBranch = B_BR->getSuccessor(1);
  B_InactiveBranch = B_BR->getSuccessor(0);
  B_BR->setUnconditionalDest(B_ActiveBranch);
  removeBlocks(B_InactiveBranch, BLoop, B_ActiveBranch);

  BasicBlock *A_Header = ALoop->getHeader();
  if (A_ExitingBlock == A_Header)
    return true;

  //[*] Move exit condition into split condition block to avoid
  //    executing dead loop iteration.
  ICmpInst *B_ExitCondition = cast<ICmpInst>(ValueMap[ExitCondition]);
  Instruction *B_IndVarIncrement = cast<Instruction>(ValueMap[IVIncrement]);
  ICmpInst *B_SplitCondition = cast<ICmpInst>(ValueMap[SplitCondition]);

  moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
                    cast<ICmpInst>(SplitCondition), IndVar, IVIncrement, 
                    ALoop, EVOpNum);

  moveExitCondition(B_SplitCondBlock, B_ActiveBranch, 
                    B_ExitBlock, B_ExitCondition,
                    B_SplitCondition, B_IndVar, B_IndVarIncrement, 
                    BLoop, EVOpNum);

  NumIndexSplit++;
  return true;
}
Ejemplo n.º 18
0
bool HexagonGenExtract::convert(Instruction *In) {
  using namespace PatternMatch;

  Value *BF = nullptr;
  ConstantInt *CSL = nullptr, *CSR = nullptr, *CM = nullptr;
  BasicBlock *BB = In->getParent();
  LLVMContext &Ctx = BB->getContext();
  bool LogicalSR;

  // (and (shl (lshr x, #sr), #sl), #m)
  LogicalSR = true;
  bool Match = match(In, m_And(m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
                               m_ConstantInt(CSL)),
                         m_ConstantInt(CM)));

  if (!Match) {
    // (and (shl (ashr x, #sr), #sl), #m)
    LogicalSR = false;
    Match = match(In, m_And(m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CSL)),
                      m_ConstantInt(CM)));
  }
  if (!Match) {
    // (and (shl x, #sl), #m)
    LogicalSR = true;
    CSR = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
    Match = match(In, m_And(m_Shl(m_Value(BF), m_ConstantInt(CSL)),
                      m_ConstantInt(CM)));
    if (Match && NoSR0)
      return false;
  }
  if (!Match) {
    // (and (lshr x, #sr), #m)
    LogicalSR = true;
    CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
    Match = match(In, m_And(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CM)));
  }
  if (!Match) {
    // (and (ashr x, #sr), #m)
    LogicalSR = false;
    CSL = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
    Match = match(In, m_And(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CM)));
  }
  if (!Match) {
    CM = nullptr;
    // (shl (lshr x, #sr), #sl)
    LogicalSR = true;
    Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CSL)));
  }
  if (!Match) {
    CM = nullptr;
    // (shl (ashr x, #sr), #sl)
    LogicalSR = false;
    Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
                            m_ConstantInt(CSL)));
  }
  if (!Match)
    return false;

  Type *Ty = BF->getType();
  if (!Ty->isIntegerTy())
    return false;
  unsigned BW = Ty->getPrimitiveSizeInBits();
  if (BW != 32 && BW != 64)
    return false;

  uint32_t SR = CSR->getZExtValue();
  uint32_t SL = CSL->getZExtValue();

  if (!CM) {
    // If there was no and, and the shift left did not remove all potential
    // sign bits created by the shift right, then extractu cannot reproduce
    // this value.
    if (!LogicalSR && (SR > SL))
      return false;
    APInt A = APInt(BW, ~0ULL).lshr(SR).shl(SL);
    CM = ConstantInt::get(Ctx, A);
  }

  // CM is the shifted-left mask. Shift it back right to remove the zero
  // bits on least-significant positions.
  APInt M = CM->getValue().lshr(SL);
  uint32_t T = M.countTrailingOnes();

  // During the shifts some of the bits will be lost. Calculate how many
  // of the original value will remain after shift right and then left.
  uint32_t U = BW - std::max(SL, SR);
  // The width of the extracted field is the minimum of the original bits
  // that remain after the shifts and the number of contiguous 1s in the mask.
  uint32_t W = std::min(U, T);
  if (W == 0)
    return false;

  // Check if the extracted bits are contained within the mask that it is
  // and-ed with. The extract operation will copy these bits, and so the
  // mask cannot any holes in it that would clear any of the bits of the
  // extracted field.
  if (!LogicalSR) {
    // If the shift right was arithmetic, it could have included some 1 bits.
    // It is still ok to generate extract, but only if the mask eliminates
    // those bits (i.e. M does not have any bits set beyond U).
    APInt C = APInt::getHighBitsSet(BW, BW-U);
    if (M.intersects(C) || !M.isMask(W))
      return false;
  } else {
    // Check if M starts with a contiguous sequence of W times 1 bits. Get
    // the low U bits of M (which eliminates the 0 bits shifted in on the
    // left), and check if the result is APInt's "mask":
    if (!M.getLoBits(U).isMask(W))
      return false;
  }

  IRBuilder<> IRB(In);
  Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
                                   : Intrinsic::hexagon_S2_extractup;
  Module *Mod = BB->getParent()->getParent();
  Value *ExtF = Intrinsic::getDeclaration(Mod, IntId);
  Value *NewIn = IRB.CreateCall(ExtF, {BF, IRB.getInt32(W), IRB.getInt32(SR)});
  if (SL != 0)
    NewIn = IRB.CreateShl(NewIn, SL, CSL->getName());
  In->replaceAllUsesWith(NewIn);
  return true;
}
Ejemplo n.º 19
0
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was succesful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) {
  assert(L->isLCSSAForm());

  BasicBlock *Header = L->getHeader();
  BasicBlock *LatchBlock = L->getLoopLatch();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
  
  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DOUT << "  Can't unroll; loop not terminated by a conditional branch.\n";
    return false;
  }

  // Find trip count
  unsigned TripCount = L->getSmallConstantTripCount();
  // Find trip multiple if count is not available
  unsigned TripMultiple = 1;
  if (TripCount == 0)
    TripMultiple = L->getSmallConstantTripMultiple();

  if (TripCount != 0)
    DOUT << "  Trip Count = " << TripCount << "\n";
  if (TripMultiple != 1)
    DOUT << "  Trip Multiple = " << TripMultiple << "\n";

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;

  // If we know the trip count, we know the multiple...
  unsigned BreakoutTrip = 0;
  if (TripCount != 0) {
    BreakoutTrip = TripCount % Count;
    TripMultiple = 0;
  } else {
    // Figure out what multiple to use.
    BreakoutTrip = TripMultiple =
      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
  }

  if (CompletelyUnroll) {
    DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName()
          << " with trip count " << TripCount << "!\n");
  } else {
    DEBUG(errs() << "UNROLLING loop %" << Header->getName()
          << " by " << Count);
    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
      DOUT << " with a breakout at trip " << BreakoutTrip;
    } else if (TripMultiple != 1) {
      DOUT << " with " << TripMultiple << " trips per branch";
    }
    DOUT << "!\n";
  }

  std::vector<BasicBlock*> LoopBlocks = L->getBlocks();

  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);

  // For the first iteration of the loop, we should use the precloned values for
  // PHI nodes.  Insert associations now.
  typedef DenseMap<const Value*, Value*> ValueMapTy;
  ValueMapTy LastValueMap;
  std::vector<PHINode*> OrigPHINode;
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    PHINode *PN = cast<PHINode>(I);
    OrigPHINode.push_back(PN);
    if (Instruction *I = 
                dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)))
      if (L->contains(I->getParent()))
        LastValueMap[I] = I;
  }

  std::vector<BasicBlock*> Headers;
  std::vector<BasicBlock*> Latches;
  Headers.push_back(Header);
  Latches.push_back(LatchBlock);

  for (unsigned It = 1; It != Count; ++It) {
    char SuffixBuffer[100];
    sprintf(SuffixBuffer, ".%d", It);
    
    std::vector<BasicBlock*> NewBlocks;
    
    for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
         E = LoopBlocks.end(); BB != E; ++BB) {
      ValueMapTy ValueMap;
      BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer);
      Header->getParent()->getBasicBlockList().push_back(New);

      // Loop over all of the PHI nodes in the block, changing them to use the
      // incoming values from the previous block.
      if (*BB == Header)
        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
          PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]);
          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
            if (It > 1 && L->contains(InValI->getParent()))
              InVal = LastValueMap[InValI];
          ValueMap[OrigPHINode[i]] = InVal;
          New->getInstList().erase(NewPHI);
        }

      // Update our running map of newest clones
      LastValueMap[*BB] = New;
      for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end();
           VI != VE; ++VI)
        LastValueMap[VI->first] = VI->second;

      L->addBasicBlockToLoop(New, LI->getBase());

      // Add phi entries for newly created values to all exit blocks except
      // the successor of the latch block.  The successor of the exit block will
      // be updated specially after unrolling all the way.
      if (*BB != LatchBlock)
        for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end();
             UI != UE;) {
          Instruction *UseInst = cast<Instruction>(*UI);
          ++UI;
          if (isa<PHINode>(UseInst) && !L->contains(UseInst->getParent())) {
            PHINode *phi = cast<PHINode>(UseInst);
            Value *Incoming = phi->getIncomingValueForBlock(*BB);
            phi->addIncoming(Incoming, New);
          }
        }

      // Keep track of new headers and latches as we create them, so that
      // we can insert the proper branches later.
      if (*BB == Header)
        Headers.push_back(New);
      if (*BB == LatchBlock) {
        Latches.push_back(New);

        // Also, clear out the new latch's back edge so that it doesn't look
        // like a new loop, so that it's amenable to being merged with adjacent
        // blocks later on.
        TerminatorInst *Term = New->getTerminator();
        assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
        assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
        Term->setSuccessor(!ContinueOnTrue, NULL);
      }

      NewBlocks.push_back(New);
    }
    
    // Remap all instructions in the most recent iteration
    for (unsigned i = 0; i < NewBlocks.size(); ++i)
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
        RemapInstruction(I, LastValueMap);
  }
  
  // The latch block exits the loop.  If there are any PHI nodes in the
  // successor blocks, update them to use the appropriate values computed as the
  // last iteration of the loop.
  if (Count != 1) {
    SmallPtrSet<PHINode*, 8> Users;
    for (Value::use_iterator UI = LatchBlock->use_begin(),
         UE = LatchBlock->use_end(); UI != UE; ++UI)
      if (PHINode *phi = dyn_cast<PHINode>(*UI))
        Users.insert(phi);
    
    BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
    for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end();
         SI != SE; ++SI) {
      PHINode *PN = *SI;
      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
      // If this value was defined in the loop, take the value defined by the
      // last iteration of the loop.
      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
        if (L->contains(InValI->getParent()))
          InVal = LastValueMap[InVal];
      }
      PN->addIncoming(InVal, LastIterationBB);
    }
  }

  // Now, if we're doing complete unrolling, loop over the PHI nodes in the
  // original block, setting them to their incoming values.
  if (CompletelyUnroll) {
    BasicBlock *Preheader = L->getLoopPreheader();
    for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
      PHINode *PN = OrigPHINode[i];
      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
      Header->getInstList().erase(PN);
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // set up the branches to connect them.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    // The original branch was replicated in each unrolled iteration.
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

    // The branch destination.
    unsigned j = (i + 1) % e;
    BasicBlock *Dest = Headers[j];
    bool NeedConditional = true;

    // For a complete unroll, make the last iteration end with a branch
    // to the exit block.
    if (CompletelyUnroll && j == 0) {
      Dest = LoopExit;
      NeedConditional = false;
    }

    // If we know the trip count or a multiple of it, we can safely use an
    // unconditional branch for some iterations.
    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
      NeedConditional = false;
    }

    if (NeedConditional) {
      // Update the conditional branch's successor for the following
      // iteration.
      Term->setSuccessor(!ContinueOnTrue, Dest);
    } else {
      Term->setUnconditionalDest(Dest);
      // Merge adjacent basic blocks, if possible.
      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) {
        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
        std::replace(Headers.begin(), Headers.end(), Dest, Fold);
      }
    }
  }
  
  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
      Instruction *Inst = I++;

      if (isInstructionTriviallyDead(Inst))
        (*BB)->getInstList().erase(Inst);
      else if (Constant *C = ConstantFoldInstruction(Inst, 
                                                     Header->getContext())) {
        Inst->replaceAllUsesWith(C);
        (*BB)->getInstList().erase(Inst);
      }
    }

  NumCompletelyUnrolled += CompletelyUnroll;
  ++NumUnrolled;
  // Remove the loop from the LoopPassManager if it's completely removed.
  if (CompletelyUnroll && LPM != NULL)
    LPM->deleteLoopFromQueue(L);

  // If we didn't completely unroll the loop, it should still be in LCSSA form.
  if (!CompletelyUnroll)
    assert(L->isLCSSAForm());

  return true;
}
Ejemplo n.º 20
0
/// If \param [in] BB has more than one predecessor that is a conditional
/// branch, attempt to use parallel and/or for the branch condition. \returns
/// true on success.
///
/// Before:
///   ......
///   %cmp10 = fcmp une float %tmp1, %tmp2
///   br i1 %cmp1, label %if.then, label %lor.rhs
///
/// lor.rhs:
///   ......
///   %cmp11 = fcmp une float %tmp3, %tmp4
///   br i1 %cmp11, label %if.then, label %ifend
///
/// if.end:  // the merge block
///   ......
///
/// if.then: // has two predecessors, both of them contains conditional branch.
///   ......
///   br label %if.end;
///
/// After:
///  ......
///  %cmp10 = fcmp une float %tmp1, %tmp2
///  ......
///  %cmp11 = fcmp une float %tmp3, %tmp4
///  %cmp12 = or i1 %cmp10, %cmp11    // parallel-or mode.
///  br i1 %cmp12, label %if.then, label %ifend
///
///  if.end:
///    ......
///
///  if.then:
///    ......
///    br label %if.end;
///
///  Current implementation handles two cases.
///  Case 1: \param BB is on the else-path.
///
///          BB1
///        /     |
///       BB2    |
///      /   \   |
///     BB3   \  |     where, BB1, BB2 contain conditional branches.
///      \    |  /     BB3 contains unconditional branch.
///       \   | /      BB4 corresponds to \param BB which is also the merge.
///  BB => BB4
///
///
///  Corresponding source code:
///
///  if (a == b && c == d)
///    statement; // BB3
///
///  Case 2: \param BB BB is on the then-path.
///
///             BB1
///          /      |
///         |      BB2
///         \    /    |  where BB1, BB2 contain conditional branches.
///  BB =>   BB3      |  BB3 contains unconditiona branch and corresponds
///           \     /    to \param BB.  BB4 is the merge.
///             BB4
///
///  Corresponding source code:
///
///  if (a == b || c == d)
///    statement;  // BB3
///
///  In both cases,  \param BB is the common successor of conditional branches.
///  In Case 1, \param BB (BB4) has an unconditional branch (BB3) as
///  its predecessor.  In Case 2, \param BB (BB3) only has conditional branches
///  as its predecessors.
///
bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
                                         Pass *P) {
  PHINode *PHI = dyn_cast<PHINode>(BB->begin());
  if (PHI)
    return false; // For simplicity, avoid cases containing PHI nodes.

  BasicBlock *LastCondBlock = NULL;
  BasicBlock *FirstCondBlock = NULL;
  BasicBlock *UnCondBlock = NULL;
  int Idx = -1;

  // Check predecessors of \param BB.
  SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
  for (SmallPtrSetIterator<BasicBlock *> PI = Preds.begin(), PE = Preds.end();
       PI != PE; ++PI) {
    BasicBlock *Pred = *PI;
    BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator());

    // All predecessors should terminate with a branch.
    if (!PBI)
      return false;

    BasicBlock *PP = Pred->getSinglePredecessor();

    if (PBI->isUnconditional()) {
      // Case 1: Pred (BB3) is an unconditional block, it should
      // have a single predecessor (BB2) that is also a predecessor
      // of \param BB (BB4) and should not have address-taken.
      // There should exist only one such unconditional
      // branch among the predecessors.
      if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
          Pred->hasAddressTaken())
        return false;

      UnCondBlock = Pred;
      continue;
    }

    // Only conditional branches are allowed beyond this point.
    assert(PBI->isConditional());

    // Condition's unique use should be the branch instruction.
    Value *PC = PBI->getCondition();
    if (!PC || !PC->hasOneUse())
      return false;

    if (PP && Preds.count(PP)) {
      // These are internal condition blocks to be merged from, e.g.,
      // BB2 in both cases.
      // Should not be address-taken.
      if (Pred->hasAddressTaken())
        return false;

      // Instructions in the internal condition blocks should be safe
      // to hoist up.
      for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
        Instruction *CI = BI++;
        if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
          return false;
      }
    } else {
      // This is the condition block to be merged into, e.g. BB1 in
      // both cases.
      if (FirstCondBlock)
        return false;
      FirstCondBlock = Pred;
    }

    // Find whether BB is uniformly on the true (or false) path
    // for all of its predecessors.
    BasicBlock *PS1 = PBI->getSuccessor(0);
    BasicBlock *PS2 = PBI->getSuccessor(1);
    BasicBlock *PS = (PS1 == BB) ? PS2 : PS1;
    int CIdx = (PS1 == BB) ? 0 : 1;

    if (Idx == -1)
      Idx = CIdx;
    else if (CIdx != Idx)
      return false;

    // PS is the successor which is not BB. Check successors to identify
    // the last conditional branch.
    if (Preds.count(PS) == 0) {
      // Case 2.
      LastCondBlock = Pred;
    } else {
      // Case 1
      BranchInst *BPS = dyn_cast<BranchInst>(PS->getTerminator());
      if (BPS && BPS->isUnconditional()) {
        // Case 1: PS(BB3) should be an unconditional branch.
        LastCondBlock = Pred;
      }
    }
  }

  if (!FirstCondBlock || !LastCondBlock || (FirstCondBlock == LastCondBlock))
    return false;

  TerminatorInst *TBB = LastCondBlock->getTerminator();
  BasicBlock *PS1 = TBB->getSuccessor(0);
  BasicBlock *PS2 = TBB->getSuccessor(1);
  BranchInst *PBI1 = dyn_cast<BranchInst>(PS1->getTerminator());
  BranchInst *PBI2 = dyn_cast<BranchInst>(PS2->getTerminator());

  // If PS1 does not jump into PS2, but PS2 jumps into PS1,
  // attempt branch inversion.
  if (!PBI1 || !PBI1->isUnconditional() ||
      (PS1->getTerminator()->getSuccessor(0) != PS2)) {
    // Check whether PS2 jumps into PS1.
    if (!PBI2 || !PBI2->isUnconditional() ||
        (PS2->getTerminator()->getSuccessor(0) != PS1))
      return false;

    // Do branch inversion.
    BasicBlock *CurrBlock = LastCondBlock;
    bool EverChanged = false;
    while (1) {
      BranchInst *BI = dyn_cast<BranchInst>(CurrBlock->getTerminator());
      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
      CmpInst::Predicate Predicate = CI->getPredicate();
      // Cannonicalize icmp_ne -> icmp_eq, fcmp_one -> fcmp_oeq
      if ((Predicate == CmpInst::ICMP_NE) || (Predicate == CmpInst::FCMP_ONE)) {
        CI->setPredicate(ICmpInst::getInversePredicate(Predicate));
        BI->swapSuccessors();
        EverChanged = true;
      }
      if (CurrBlock == FirstCondBlock)
        break;
      CurrBlock = CurrBlock->getSinglePredecessor();
    }
    return EverChanged;
  }

  // PS1 must have a conditional branch.
  if (!PBI1 || !PBI1->isUnconditional())
    return false;

  // PS2 should not contain PHI node.
  PHI = dyn_cast<PHINode>(PS2->begin());
  if (PHI)
    return false;

  // Do the transformation.
  BasicBlock *CB;
  BranchInst *PBI = dyn_cast<BranchInst>(FirstCondBlock->getTerminator());
  bool Iteration = true;
  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
  Value *PC = PBI->getCondition();

  do {
    CB = PBI->getSuccessor(1 - Idx);
    // Delete the conditional branch.
    FirstCondBlock->getInstList().pop_back();
    FirstCondBlock->getInstList()
        .splice(FirstCondBlock->end(), CB->getInstList());
    PBI = cast<BranchInst>(FirstCondBlock->getTerminator());
    Value *CC = PBI->getCondition();
    // Merge conditions.
    Builder.SetInsertPoint(PBI);
    Value *NC;
    if (Idx == 0)
      // Case 2, use parallel or.
      NC = Builder.CreateOr(PC, CC);
    else
      // Case 1, use parallel and.
      NC = Builder.CreateAnd(PC, CC);

    PBI->replaceUsesOfWith(CC, NC);
    PC = NC;
    if (CB == LastCondBlock)
      Iteration = false;
    // Remove internal conditional branches.
    CB->dropAllReferences();
    // make CB unreachable and let downstream to delete the block.
    new UnreachableInst(CB->getContext(), CB);
  } while (Iteration);

  Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
  DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);
  return true;
}
Ejemplo n.º 21
0
void LowerSIMDLoop::enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) const
{
    typedef SmallVector<Instruction*, 8> chainVector;
    chainVector chain;
    Instruction *J;
    unsigned opcode = 0;
    for (Instruction *I = Phi; ; I=J) {
        J = NULL;
        // Find the user of instruction I that is within loop L.
#if JL_LLVM_VERSION >= 30500
        for (User *UI : I->users()) { /*}*/
            Instruction *U = cast<Instruction>(UI);
#else
        for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) {
            Instruction *U = cast<Instruction>(*UI);
#endif
            if (L->contains(U)) {
                if (J) {
                    DEBUG(dbgs() << "LSL: not a reduction var because op has two internal uses: " << *I << "\n");
                    return;
                }
                J = U;
            }
        }
        if (!J) {
            DEBUG(dbgs() << "LSL: chain prematurely terminated at " << *I << "\n");
            return;
        }
        if (J==Phi) {
            // Found the entire chain.
            break;
        }
        if (opcode) {
            // Check that arithmetic op matches prior arithmetic ops in the chain.
            if (J->getOpcode()!=opcode) {
                DEBUG(dbgs() << "LSL: chain broke at " << *J << " because of wrong opcode\n");
                return;
            }
        }
        else {
            // First arithmetic op in the chain.
            opcode = J->getOpcode();
            if (opcode!=Instruction::FAdd && opcode!=Instruction::FMul) {
                DEBUG(dbgs() << "LSL: first arithmetic op in chain is uninteresting" << *J << "\n");
                return;
            }
        }
        chain.push_back(J);
    }
    for (chainVector::const_iterator K=chain.begin(); K!=chain.end(); ++K) {
        DEBUG(dbgs() << "LSL: marking " << **K << "\n");
        (*K)->setHasUnsafeAlgebra(true);
    }
}

bool LowerSIMDLoop::runOnLoop(Loop *L, LPPassManager &LPM)
{
    if (!simd_loop_mdkind) {
        simd_loop_mdkind = L->getHeader()->getContext().getMDKindID("simd_loop");
#if JL_LLVM_VERSION >= 30600
        simd_loop_md = MDNode::get(L->getHeader()->getContext(), ArrayRef<Metadata*>());
#else
        simd_loop_md = MDNode::get(L->getHeader()->getContext(), ArrayRef<Value*>());
#endif
    }

    if (!hasSIMDLoopMetadata(L))
        return false;

    DEBUG(dbgs() << "LSL: simd_loop found\n");
    BasicBlock *Lh = L->getHeader();
    DEBUG(dbgs() << "LSL: loop header: " << *Lh << "\n");
#if JL_LLVM_VERSION >= 30400
    MDNode *n = L->getLoopID();
    if (!n) {
        // Loop does not have a LoopID yet, so give it one.
#if JL_LLVM_VERSION >= 30600
        n = MDNode::get(Lh->getContext(), ArrayRef<Metadata*>(NULL));
#else
        n = MDNode::get(Lh->getContext(), ArrayRef<Value*>(NULL));
#endif
        n->replaceOperandWith(0,n);
        L->setLoopID(n);
    }
#else
    MDNode *n = MDNode::get(Lh->getContext(), ArrayRef<Value*>());
    L->getLoopLatch()->getTerminator()->setMetadata("llvm.loop.parallel", n);
#endif
#if JL_LLVM_VERSION >= 30600
    MDNode *m = MDNode::get(Lh->getContext(), ArrayRef<Metadata*>(n));
#else
    MDNode *m = MDNode::get(Lh->getContext(), ArrayRef<Value*>(n));
#endif

    // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop.
    for(Loop::block_iterator BBI = L->block_begin(), E=L->block_end(); BBI!=E; ++BBI)
        for (BasicBlock::iterator I = (*BBI)->begin(), EE = (*BBI)->end(); I!=EE; ++I)
            if (I->mayReadOrWriteMemory())
                I->setMetadata("llvm.mem.parallel_loop_access", m);
    assert(L->isAnnotatedParallel());

    // Mark floating-point reductions as okay to reassociate/commute.
    for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I!=E; ++I)
        if (PHINode *Phi = dyn_cast<PHINode>(I))
            enableUnsafeAlgebraIfReduction(Phi,L);

    return true;
}

char LowerSIMDLoop::ID = 0;

static RegisterPass<LowerSIMDLoop> X("LowerSIMDLoop", "LowerSIMDLoop Pass",
                                     false /* Only looks at CFG */,
                                     false /* Analysis Pass */);

JL_DLLEXPORT Pass *createLowerSimdLoopPass()
{
    return new LowerSIMDLoop();
}

} // namespace llvm
Ejemplo n.º 22
0
void WorklessInstrument::CloneInnerLoop(Loop * pLoop, vector<BasicBlock *> & vecAdd, ValueToValueMapTy & VMap, set<BasicBlock *> & setCloned)
{
	Function * pFunction = pLoop->getHeader()->getParent();
	BasicBlock * pPreHeader = vecAdd[0];

	SmallVector<BasicBlock *, 4> ExitBlocks;
	pLoop->getExitBlocks(ExitBlocks);

	set<BasicBlock *> setExitBlocks;

	for(unsigned long i = 0; i < ExitBlocks.size(); i++)
	{
		setExitBlocks.insert(ExitBlocks[i]);
	}

	for(unsigned long i = 0; i < ExitBlocks.size(); i++ )
	{
		VMap[ExitBlocks[i]] = ExitBlocks[i];
	}

	vector<BasicBlock *> ToClone;
	vector<BasicBlock *> BeenCloned;

	
	//clone loop
	ToClone.push_back(pLoop->getHeader());

	while(ToClone.size()>0)
	{
		BasicBlock * pCurrent = ToClone.back();
		ToClone.pop_back();

		WeakVH & BBEntry = VMap[pCurrent];
		if (BBEntry)
		{
			continue;
		}

		BasicBlock * NewBB;
		BBEntry = NewBB = BasicBlock::Create(pCurrent->getContext(), "", pFunction);

		if(pCurrent->hasName())
		{
			NewBB->setName(pCurrent->getName() + ".CPI");
		}

		if(pCurrent->hasAddressTaken())
		{
			errs() << "hasAddressTaken branch\n" ;
			exit(0);
		}

		for(BasicBlock::const_iterator II = pCurrent->begin(); II != pCurrent->end(); ++II )
		{
			Instruction * NewInst = II->clone();
			if(II->hasName())
			{
				NewInst->setName(II->getName() + ".CPI");
			}
			VMap[II] = NewInst;
			NewBB->getInstList().push_back(NewInst);
		}

		const TerminatorInst *TI = pCurrent->getTerminator();
		for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
		{
			ToClone.push_back(TI->getSuccessor(i));
		}

		setCloned.insert(NewBB);
		BeenCloned.push_back(NewBB);
	}

	//remap value used inside loop
	vector<BasicBlock *>::iterator itVecBegin = BeenCloned.begin();
	vector<BasicBlock *>::iterator itVecEnd = BeenCloned.end();

	for(; itVecBegin != itVecEnd; itVecBegin ++)
	{
		for(BasicBlock::iterator II = (*itVecBegin)->begin(); II != (*itVecBegin)->end(); II ++ )
		{
			//II->dump();
			RemapInstruction(II, VMap);
		}
	}

	//add to the else if body
	BasicBlock * pElseBody = vecAdd[1];

	BasicBlock * pClonedHeader = cast<BasicBlock>(VMap[pLoop->getHeader()]);

	BranchInst::Create(pClonedHeader, pElseBody);

	//errs() << pPreHeader->getName() << "\n";
	for(BasicBlock::iterator II = pClonedHeader->begin(); II != pClonedHeader->end(); II ++ )
	{
		if(PHINode * pPHI = dyn_cast<PHINode>(II))
		{
			vector<int> vecToRemoved;
			for (unsigned i = 0, e = pPHI->getNumIncomingValues(); i != e; ++i) 
			{
				if(pPHI->getIncomingBlock(i) == pPreHeader)
				{
					pPHI->setIncomingBlock(i, pElseBody);
				}
			}
		}
	}

	set<BasicBlock *> setProcessedBlock;

	for(unsigned long i = 0; i < ExitBlocks.size(); i++ )
	{
		if(setProcessedBlock.find(ExitBlocks[i]) != setProcessedBlock.end() )
		{
			continue;
		}
		else
		{
			setProcessedBlock.insert(ExitBlocks[i]);
		}

		for(BasicBlock::iterator II = ExitBlocks[i]->begin(); II != ExitBlocks[i]->end(); II ++ )
		{
			if(PHINode * pPHI = dyn_cast<PHINode>(II))
			{
				unsigned numIncomming = pPHI->getNumIncomingValues();
				for(unsigned i = 0; i<numIncomming; i++)
				{
					BasicBlock * incommingBlock = pPHI->getIncomingBlock(i);
					if(VMap.find(incommingBlock) != VMap.end() )
					{
						Value * incommingValue = pPHI->getIncomingValue(i);

						if(VMap.find(incommingValue) != VMap.end() )
						{
							incommingValue = VMap[incommingValue];
						}

						pPHI->addIncoming(incommingValue, cast<BasicBlock>(VMap[incommingBlock]));

					}
				} 

			}
		}
	}
}