Esempio n. 1
0
/// updateLoopIterationSpace -- Update loop's iteration space if loop 
/// body is executed for certain IV range only. For example,
/// 
/// for (i = 0; i < N; ++i) {
///   if ( i > A && i < B) {
///     ...
///   }
/// }
/// is transformed to iterators from A to B, if A > 0 and B < N.
///
bool LoopIndexSplit::updateLoopIterationSpace() {
  SplitCondition = NULL;
  if (ExitCondition->getPredicate() == ICmpInst::ICMP_NE
      || ExitCondition->getPredicate() == ICmpInst::ICMP_EQ)
    return false;
  BasicBlock *Latch = L->getLoopLatch();
  BasicBlock *Header = L->getHeader();
  BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
  if (!BR) return false;
  if (!isa<BranchInst>(Latch->getTerminator())) return false;
  if (BR->isUnconditional()) return false;
  BinaryOperator *AND = dyn_cast<BinaryOperator>(BR->getCondition());
  if (!AND) return false;
  if (AND->getOpcode() != Instruction::And) return false;
  ICmpInst *Op0 = dyn_cast<ICmpInst>(AND->getOperand(0));
  ICmpInst *Op1 = dyn_cast<ICmpInst>(AND->getOperand(1));
  if (!Op0 || !Op1)
    return false;
  IVBasedValues.insert(AND);
  IVBasedValues.insert(Op0);
  IVBasedValues.insert(Op1);
  if (!cleanBlock(Header)) return false;
  BasicBlock *ExitingBlock = ExitCondition->getParent();
  if (!cleanBlock(ExitingBlock)) return false;

  // If the merge point for BR is not loop latch then skip this loop.
  if (BR->getSuccessor(0) != Latch) {
    DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
    assert (DF0 != DF->end() && "Unable to find dominance frontier");
    if (!DF0->second.count(Latch))
      return false;
  }
  
  if (BR->getSuccessor(1) != Latch) {
    DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
    assert (DF1 != DF->end() && "Unable to find dominance frontier");
    if (!DF1->second.count(Latch))
      return false;
  }
    
  // Verify that loop exiting block has only two predecessor, where one pred
  // is split condition block. The other predecessor will become exiting block's
  // dominator after CFG is updated. TODO : Handle CFG's where exiting block has
  // more then two predecessors. This requires extra work in updating dominator
  // information.
  BasicBlock *ExitingBBPred = NULL;
  for (pred_iterator PI = pred_begin(ExitingBlock), PE = pred_end(ExitingBlock);
       PI != PE; ++PI) {
    BasicBlock *BB = *PI;
    if (Header == BB)
      continue;
    if (ExitingBBPred)
      return false;
    else
      ExitingBBPred = BB;
  }

  if (!restrictLoopBound(*Op0))
    return false;

  if (!restrictLoopBound(*Op1))
    return false;

  // Update CFG.
  if (BR->getSuccessor(0) == ExitingBlock)
    BR->setUnconditionalDest(BR->getSuccessor(1));
  else
    BR->setUnconditionalDest(BR->getSuccessor(0));

  AND->eraseFromParent();
  if (Op0->use_empty())
    Op0->eraseFromParent();
  if (Op1->use_empty())
    Op1->eraseFromParent();

  // Update domiantor info. Now, ExitingBlock has only one predecessor, 
  // ExitingBBPred, and it is ExitingBlock's immediate domiantor.
  DT->changeImmediateDominator(ExitingBlock, ExitingBBPred);

  BasicBlock *ExitBlock = ExitingBlock->getTerminator()->getSuccessor(1);
  if (L->contains(ExitBlock))
    ExitBlock = ExitingBlock->getTerminator()->getSuccessor(0);

  // If ExitingBlock is a member of the loop basic blocks' DF list then
  // replace ExitingBlock with header and exit block in the DF list
  DominanceFrontier::iterator ExitingBlockDF = DF->find(ExitingBlock);
  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
       I != E; ++I) {
    BasicBlock *BB = *I;
    if (BB == Header || BB == ExitingBlock)
      continue;
    DominanceFrontier::iterator BBDF = DF->find(BB);
    DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
    DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
    while (DomSetI != DomSetE) {
      DominanceFrontier::DomSetType::iterator CurrentItr = DomSetI;
      ++DomSetI;
      BasicBlock *DFBB = *CurrentItr;
      if (DFBB == ExitingBlock) {
        BBDF->second.erase(DFBB);
        for (DominanceFrontier::DomSetType::iterator 
               EBI = ExitingBlockDF->second.begin(),
               EBE = ExitingBlockDF->second.end(); EBI != EBE; ++EBI) 
          BBDF->second.insert(*EBI);
      }
    }
  }
  NumRestrictBounds++;
  return true;
}
Esempio n. 2
0
/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
/// if unrolling was succesful, or false if the loop was unmodified. Unrolling
/// can only fail when the loop's latch block is not terminated by a conditional
/// branch instruction. However, if the trip count (and multiple) are not known,
/// loop unrolling will mostly produce more code that is no faster.
///
/// The LoopInfo Analysis that is passed will be kept consistent.
///
/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
/// removed from the LoopPassManager as well. LPM can also be NULL.
bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) {
  assert(L->isLCSSAForm());

  BasicBlock *Header = L->getHeader();
  BasicBlock *LatchBlock = L->getLoopLatch();
  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
  
  if (!BI || BI->isUnconditional()) {
    // The loop-rotate pass can be helpful to avoid this in many cases.
    DOUT << "  Can't unroll; loop not terminated by a conditional branch.\n";
    return false;
  }

  // Find trip count
  unsigned TripCount = L->getSmallConstantTripCount();
  // Find trip multiple if count is not available
  unsigned TripMultiple = 1;
  if (TripCount == 0)
    TripMultiple = L->getSmallConstantTripMultiple();

  if (TripCount != 0)
    DOUT << "  Trip Count = " << TripCount << "\n";
  if (TripMultiple != 1)
    DOUT << "  Trip Multiple = " << TripMultiple << "\n";

  // Effectively "DCE" unrolled iterations that are beyond the tripcount
  // and will never be executed.
  if (TripCount != 0 && Count > TripCount)
    Count = TripCount;

  assert(Count > 0);
  assert(TripMultiple > 0);
  assert(TripCount == 0 || TripCount % TripMultiple == 0);

  // Are we eliminating the loop control altogether?
  bool CompletelyUnroll = Count == TripCount;

  // If we know the trip count, we know the multiple...
  unsigned BreakoutTrip = 0;
  if (TripCount != 0) {
    BreakoutTrip = TripCount % Count;
    TripMultiple = 0;
  } else {
    // Figure out what multiple to use.
    BreakoutTrip = TripMultiple =
      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
  }

  if (CompletelyUnroll) {
    DEBUG(errs() << "COMPLETELY UNROLLING loop %" << Header->getName()
          << " with trip count " << TripCount << "!\n");
  } else {
    DEBUG(errs() << "UNROLLING loop %" << Header->getName()
          << " by " << Count);
    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
      DOUT << " with a breakout at trip " << BreakoutTrip;
    } else if (TripMultiple != 1) {
      DOUT << " with " << TripMultiple << " trips per branch";
    }
    DOUT << "!\n";
  }

  std::vector<BasicBlock*> LoopBlocks = L->getBlocks();

  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);

  // For the first iteration of the loop, we should use the precloned values for
  // PHI nodes.  Insert associations now.
  typedef DenseMap<const Value*, Value*> ValueMapTy;
  ValueMapTy LastValueMap;
  std::vector<PHINode*> OrigPHINode;
  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
    PHINode *PN = cast<PHINode>(I);
    OrigPHINode.push_back(PN);
    if (Instruction *I = 
                dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)))
      if (L->contains(I->getParent()))
        LastValueMap[I] = I;
  }

  std::vector<BasicBlock*> Headers;
  std::vector<BasicBlock*> Latches;
  Headers.push_back(Header);
  Latches.push_back(LatchBlock);

  for (unsigned It = 1; It != Count; ++It) {
    char SuffixBuffer[100];
    sprintf(SuffixBuffer, ".%d", It);
    
    std::vector<BasicBlock*> NewBlocks;
    
    for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
         E = LoopBlocks.end(); BB != E; ++BB) {
      ValueMapTy ValueMap;
      BasicBlock *New = CloneBasicBlock(*BB, ValueMap, SuffixBuffer);
      Header->getParent()->getBasicBlockList().push_back(New);

      // Loop over all of the PHI nodes in the block, changing them to use the
      // incoming values from the previous block.
      if (*BB == Header)
        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
          PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]);
          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
            if (It > 1 && L->contains(InValI->getParent()))
              InVal = LastValueMap[InValI];
          ValueMap[OrigPHINode[i]] = InVal;
          New->getInstList().erase(NewPHI);
        }

      // Update our running map of newest clones
      LastValueMap[*BB] = New;
      for (ValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end();
           VI != VE; ++VI)
        LastValueMap[VI->first] = VI->second;

      L->addBasicBlockToLoop(New, LI->getBase());

      // Add phi entries for newly created values to all exit blocks except
      // the successor of the latch block.  The successor of the exit block will
      // be updated specially after unrolling all the way.
      if (*BB != LatchBlock)
        for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end();
             UI != UE;) {
          Instruction *UseInst = cast<Instruction>(*UI);
          ++UI;
          if (isa<PHINode>(UseInst) && !L->contains(UseInst->getParent())) {
            PHINode *phi = cast<PHINode>(UseInst);
            Value *Incoming = phi->getIncomingValueForBlock(*BB);
            phi->addIncoming(Incoming, New);
          }
        }

      // Keep track of new headers and latches as we create them, so that
      // we can insert the proper branches later.
      if (*BB == Header)
        Headers.push_back(New);
      if (*BB == LatchBlock) {
        Latches.push_back(New);

        // Also, clear out the new latch's back edge so that it doesn't look
        // like a new loop, so that it's amenable to being merged with adjacent
        // blocks later on.
        TerminatorInst *Term = New->getTerminator();
        assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
        assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
        Term->setSuccessor(!ContinueOnTrue, NULL);
      }

      NewBlocks.push_back(New);
    }
    
    // Remap all instructions in the most recent iteration
    for (unsigned i = 0; i < NewBlocks.size(); ++i)
      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
           E = NewBlocks[i]->end(); I != E; ++I)
        RemapInstruction(I, LastValueMap);
  }
  
  // The latch block exits the loop.  If there are any PHI nodes in the
  // successor blocks, update them to use the appropriate values computed as the
  // last iteration of the loop.
  if (Count != 1) {
    SmallPtrSet<PHINode*, 8> Users;
    for (Value::use_iterator UI = LatchBlock->use_begin(),
         UE = LatchBlock->use_end(); UI != UE; ++UI)
      if (PHINode *phi = dyn_cast<PHINode>(*UI))
        Users.insert(phi);
    
    BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
    for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end();
         SI != SE; ++SI) {
      PHINode *PN = *SI;
      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
      // If this value was defined in the loop, take the value defined by the
      // last iteration of the loop.
      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
        if (L->contains(InValI->getParent()))
          InVal = LastValueMap[InVal];
      }
      PN->addIncoming(InVal, LastIterationBB);
    }
  }

  // Now, if we're doing complete unrolling, loop over the PHI nodes in the
  // original block, setting them to their incoming values.
  if (CompletelyUnroll) {
    BasicBlock *Preheader = L->getLoopPreheader();
    for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
      PHINode *PN = OrigPHINode[i];
      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
      Header->getInstList().erase(PN);
    }
  }

  // Now that all the basic blocks for the unrolled iterations are in place,
  // set up the branches to connect them.
  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
    // The original branch was replicated in each unrolled iteration.
    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

    // The branch destination.
    unsigned j = (i + 1) % e;
    BasicBlock *Dest = Headers[j];
    bool NeedConditional = true;

    // For a complete unroll, make the last iteration end with a branch
    // to the exit block.
    if (CompletelyUnroll && j == 0) {
      Dest = LoopExit;
      NeedConditional = false;
    }

    // If we know the trip count or a multiple of it, we can safely use an
    // unconditional branch for some iterations.
    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
      NeedConditional = false;
    }

    if (NeedConditional) {
      // Update the conditional branch's successor for the following
      // iteration.
      Term->setSuccessor(!ContinueOnTrue, Dest);
    } else {
      Term->setUnconditionalDest(Dest);
      // Merge adjacent basic blocks, if possible.
      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) {
        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
        std::replace(Headers.begin(), Headers.end(), Dest, Fold);
      }
    }
  }
  
  // At this point, the code is well formed.  We now do a quick sweep over the
  // inserted code, doing constant propagation and dead code elimination as we
  // go.
  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
      Instruction *Inst = I++;

      if (isInstructionTriviallyDead(Inst))
        (*BB)->getInstList().erase(Inst);
      else if (Constant *C = ConstantFoldInstruction(Inst, 
                                                     Header->getContext())) {
        Inst->replaceAllUsesWith(C);
        (*BB)->getInstList().erase(Inst);
      }
    }

  NumCompletelyUnrolled += CompletelyUnroll;
  ++NumUnrolled;
  // Remove the loop from the LoopPassManager if it's completely removed.
  if (CompletelyUnroll && LPM != NULL)
    LPM->deleteLoopFromQueue(L);

  // If we didn't completely unroll the loop, it should still be in LCSSA form.
  if (!CompletelyUnroll)
    assert(L->isLCSSAForm());

  return true;
}
Esempio n. 3
0
/// processOneIterationLoop -- Eliminate loop if loop body is executed 
/// only once. For example,
/// for (i = 0; i < N; ++i) {
///   if ( i == X) {
///     ...
///   }
/// }
///
bool LoopIndexSplit::processOneIterationLoop() {
  SplitCondition = NULL;
  BasicBlock *Latch = L->getLoopLatch();
  BasicBlock *Header = L->getHeader();
  BranchInst *BR = dyn_cast<BranchInst>(Header->getTerminator());
  if (!BR) return false;
  if (!isa<BranchInst>(Latch->getTerminator())) return false;
  if (BR->isUnconditional()) return false;
  SplitCondition = dyn_cast<ICmpInst>(BR->getCondition());
  if (!SplitCondition) return false;
  if (SplitCondition == ExitCondition) return false;
  if (SplitCondition->getPredicate() != ICmpInst::ICMP_EQ) return false;
  if (BR->getOperand(1) != Latch) return false;
  if (!IVBasedValues.count(SplitCondition->getOperand(0))
      && !IVBasedValues.count(SplitCondition->getOperand(1)))
    return false;

  // If IV is used outside the loop then this loop traversal is required.
  // FIXME: Calculate and use last IV value. 
  if (isUsedOutsideLoop(IVIncrement, L))
    return false;

  // If BR operands are not IV or not loop invariants then skip this loop.
  Value *OPV = SplitCondition->getOperand(0);
  Value *SplitValue = SplitCondition->getOperand(1);
  if (!L->isLoopInvariant(SplitValue))
    std::swap(OPV, SplitValue);
  if (!L->isLoopInvariant(SplitValue))
    return false;
  Instruction *OPI = dyn_cast<Instruction>(OPV);
  if (!OPI) 
    return false;
  if (OPI->getParent() != Header || isUsedOutsideLoop(OPI, L))
    return false;
  Value *StartValue = IVStartValue;
  Value *ExitValue = IVExitValue;;

  if (OPV != IndVar) {
    // If BR operand is IV based then use this operand to calculate
    // effective conditions for loop body.
    BinaryOperator *BOPV = dyn_cast<BinaryOperator>(OPV);
    if (!BOPV) 
      return false;
    if (BOPV->getOpcode() != Instruction::Add) 
      return false;
    StartValue = BinaryOperator::CreateAdd(OPV, StartValue, "" , BR);
    ExitValue = BinaryOperator::CreateAdd(OPV, ExitValue, "" , BR);
  }

  if (!cleanBlock(Header))
    return false;

  if (!cleanBlock(Latch))
    return false;
    
  // If the merge point for BR is not loop latch then skip this loop.
  if (BR->getSuccessor(0) != Latch) {
    DominanceFrontier::iterator DF0 = DF->find(BR->getSuccessor(0));
    assert (DF0 != DF->end() && "Unable to find dominance frontier");
    if (!DF0->second.count(Latch))
      return false;
  }
  
  if (BR->getSuccessor(1) != Latch) {
    DominanceFrontier::iterator DF1 = DF->find(BR->getSuccessor(1));
    assert (DF1 != DF->end() && "Unable to find dominance frontier");
    if (!DF1->second.count(Latch))
      return false;
  }
    
  // Now, Current loop L contains compare instruction
  // that compares induction variable, IndVar, against loop invariant. And
  // entire (i.e. meaningful) loop body is dominated by this compare
  // instruction. In such case eliminate 
  // loop structure surrounding this loop body. For example,
  //     for (int i = start; i < end; ++i) {
  //         if ( i == somevalue) {
  //           loop_body
  //         }
  //     }
  // can be transformed into
  //     if (somevalue >= start && somevalue < end) {
  //        i = somevalue;
  //        loop_body
  //     }

  // Replace index variable with split value in loop body. Loop body is executed
  // only when index variable is equal to split value.
  IndVar->replaceAllUsesWith(SplitValue);

  // Replace split condition in header.
  // Transform 
  //      SplitCondition : icmp eq i32 IndVar, SplitValue
  // into
  //      c1 = icmp uge i32 SplitValue, StartValue
  //      c2 = icmp ult i32 SplitValue, ExitValue
  //      and i32 c1, c2 
  Instruction *C1 = new ICmpInst(BR, ExitCondition->isSigned() ? 
                                 ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE,
                                 SplitValue, StartValue, "lisplit");

  CmpInst::Predicate C2P  = ExitCondition->getPredicate();
  BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
  if (LatchBR->getOperand(1) != Header)
    C2P = CmpInst::getInversePredicate(C2P);
  Instruction *C2 = new ICmpInst(BR, C2P, SplitValue, ExitValue, "lisplit");
  Instruction *NSplitCond = BinaryOperator::CreateAnd(C1, C2, "lisplit", BR);

  SplitCondition->replaceAllUsesWith(NSplitCond);
  SplitCondition->eraseFromParent();

  // Remove Latch to Header edge.
  BasicBlock *LatchSucc = NULL;
  Header->removePredecessor(Latch);
  for (succ_iterator SI = succ_begin(Latch), E = succ_end(Latch);
       SI != E; ++SI) {
    if (Header != *SI)
      LatchSucc = *SI;
  }

  // Clean up latch block.
  Value *LatchBRCond = LatchBR->getCondition();
  LatchBR->setUnconditionalDest(LatchSucc);
  RecursivelyDeleteTriviallyDeadInstructions(LatchBRCond);
  
  LPM->deleteLoopFromQueue(L);

  // Update Dominator Info.
  // Only CFG change done is to remove Latch to Header edge. This
  // does not change dominator tree because Latch did not dominate
  // Header.
  if (DF) {
    DominanceFrontier::iterator HeaderDF = DF->find(Header);
    if (HeaderDF != DF->end()) 
      DF->removeFromFrontier(HeaderDF, Header);

    DominanceFrontier::iterator LatchDF = DF->find(Latch);
    if (LatchDF != DF->end()) 
      DF->removeFromFrontier(LatchDF, Header);
  }

  ++NumIndexSplitRemoved;
  return true;
}
Esempio n. 4
0
//insert a basic block with appropriate code
//along a given edge
void insertBB(Edge ed,
	      getEdgeCode *edgeCode, 
	      Instruction *rInst, 
	      Value *countInst, 
	      int numPaths, int Methno, Value *threshold){

  BasicBlock* BB1=ed.getFirst()->getElement();
  BasicBlock* BB2=ed.getSecond()->getElement();
  
#ifdef DEBUG_PATH_PROFILES
  //debugging info
  cerr<<"Edges with codes ######################\n";
  cerr<<BB1->getName()<<"->"<<BB2->getName()<<"\n";
  cerr<<"########################\n";
#endif
  
  //We need to insert a BB between BB1 and BB2 
  TerminatorInst *TI=BB1->getTerminator();
  BasicBlock *newBB=new BasicBlock("counter", BB1->getParent());

  //get code for the new BB
  vector<Value *> retVec;

  edgeCode->getCode(rInst, countInst, BB1->getParent(), newBB, retVec);

  BranchInst *BI =  cast<BranchInst>(TI);

  //Is terminator a branch instruction?
  //then we need to change branch destinations to include new BB

  if(BI->isUnconditional()){
    BI->setUnconditionalDest(newBB);
  }
  else{
      if(BI->getSuccessor(0)==BB2)
      BI->setSuccessor(0, newBB);
    
    if(BI->getSuccessor(1)==BB2)
      BI->setSuccessor(1, newBB);
  }

  BasicBlock *triggerBB = NULL;
  if(retVec.size()>0){
    triggerBB = new BasicBlock("trigger", BB1->getParent());
    getTriggerCode(BB1->getParent()->getParent(), triggerBB, Methno, 
                   retVec[1], countInst, rInst);//retVec[0]);

    //Instruction *castInst = new CastInst(retVec[0], Type::IntTy, "");
    Instruction *etr = new LoadInst(threshold, "threshold");
    
    //std::cerr<<"type1: "<<etr->getType()<<" type2: "<<retVec[0]->getType()<<"\n"; 
    Instruction *cmpInst = new SetCondInst(Instruction::SetLE, etr, 
                                           retVec[0], "");
    Instruction *newBI2 = new BranchInst(triggerBB, BB2, cmpInst);
    //newBB->getInstList().push_back(castInst);
    newBB->getInstList().push_back(etr);
    newBB->getInstList().push_back(cmpInst);
    newBB->getInstList().push_back(newBI2);
    
    //triggerBB->getInstList().push_back(triggerInst);
    new BranchInst(BB2, 0, 0, triggerBB);
  }
  else{
    new BranchInst(BB2, 0, 0, newBB);
  }

  //now iterate over BB2, and set its Phi nodes right
  for(BasicBlock::iterator BB2Inst = BB2->begin(), BBend = BB2->end(); 
      BB2Inst != BBend; ++BB2Inst){
   
    if(PHINode *phiInst=dyn_cast<PHINode>(BB2Inst)){
      int bbIndex=phiInst->getBasicBlockIndex(BB1);
      assert(bbIndex>=0);
      phiInst->setIncomingBlock(bbIndex, newBB);

      ///check if trigger!=null, then add value corresponding to it too!
      if(retVec.size()>0){
        assert(triggerBB && "BasicBlock with trigger should not be null!");
        Value *vl = phiInst->getIncomingValue((unsigned int)bbIndex);
        phiInst->addIncoming(vl, triggerBB);
      }
    }
  }
}