/*
FindRoots()
  for each instruction I = ’R <- op, Ra, Rb’
    if op(I) not associative or commutative
       continue
    // I is a root unless R is a temporary
    //     (temporaries are only used once and by an instruction with the same operator)
    if NumUses(R) > 1 or op(Use(R)) != op(I)
       mark I as root, processed(root) = false
  order roots such that precedence of op(r$_i$) $\leq$ precedence of op(r$_{i+1}$)
  while roots not empty
    I = ’R <- op, Ra, Rb’ = Def(Pop(root))
    BalanceTree(I)
*/
bool findRoots(Function* f)
{
  bool changed = false;
  assert(f);
  std::vector<BinaryOperator*> roots;
  
  for(Function::iterator BB = f->begin(); BB != f->end(); ++BB)
  {
    for(BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
    {
      BinaryOperator* BO = dynamic_cast<BinaryOperator*>(&*II);
      if( BO and isCommutativeOperation(BO) and isAssociativeOperation(BO) )
      {
        if( getRealNumUses(BO) > 1 )
        {
          roots.push_back(BO);
          INTERNAL_MESSAGE("Root " << BO->getName() << " added for numUses > 1.\n");
        }
        else
        {
          for(Value::use_iterator UI = BO->use_begin(); UI != BO->use_end(); ++UI)
          {
            if( isDifferentOperation(BO, *UI)  )
            {
              roots.push_back(BO);
              INTERNAL_MESSAGE("Root " << BO->getName() << " added because it is different operation than " << (*UI)->getName() << "\n");
            } 
          }
        }
      }
    }
  }
  std::sort(roots.begin(), roots.end(), precedence_less_than);
  std::list<BinaryOperator*> root_queue;
  root_queue.resize(roots.size());
  std::copy(roots.begin(), roots.end(), root_queue.begin());
  std::map<Instruction*,bool> visitMap;
  int roots_balanced = 0;
  while( !root_queue.empty() )
  {
    BinaryOperator* BO = root_queue.front();
    root_queue.pop_front();
    bool root_changed = balanceTree(BO, visitMap, roots);
    if( root_changed )
      ++roots_balanced;
    changed = root_changed or changed;
  }
  std::stringstream ss;
  ss << "Attempted to balance " << roots.size() << " roots (";
  for(std::vector<BinaryOperator*>::iterator RI = roots.begin(); RI != roots.end(); ++RI)
  {
    if( RI != roots.begin() )
      ss << ", ";
    ss << getValueName((*RI));
  }
  ss << "), " << roots_balanced << " needed balancing.\n";
  LOG_MESSAGE1("Balancing", ss.str());
  return changed;
}
示例#2
0
/// HandleFloatingPointIV - If the loop has floating induction variable
/// then insert corresponding integer induction variable if possible.
/// For example,
/// for(double i = 0; i < 10000; ++i)
///   bar(i)
/// is converted into
/// for(int i = 0; i < 10000; ++i)
///   bar((double)i);
///
void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
  unsigned BackEdge     = IncomingEdge^1;

  // Check incoming value.
  ConstantFP *InitValueVal =
    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));

  int64_t InitValue;
  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
    return;

  // Check IV increment. Reject this PN if increment operation is not
  // an add or increment value can not be represented by an integer.
  BinaryOperator *Incr =
    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
  
  // If this is not an add of the PHI with a constantfp, or if the constant fp
  // is not an integer, bail out.
  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
  int64_t IncValue;
  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
    return;

  // Check Incr uses. One user is PN and the other user is an exit condition
  // used by the conditional terminator.
  Value::use_iterator IncrUse = Incr->use_begin();
  Instruction *U1 = cast<Instruction>(IncrUse++);
  if (IncrUse == Incr->use_end()) return;
  Instruction *U2 = cast<Instruction>(IncrUse++);
  if (IncrUse != Incr->use_end()) return;

  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
  // only used by a branch, we can't transform it.
  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
  if (!Compare)
    Compare = dyn_cast<FCmpInst>(U2);
  if (Compare == 0 || !Compare->hasOneUse() ||
      !isa<BranchInst>(Compare->use_back()))
    return;
  
  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());

  // We need to verify that the branch actually controls the iteration count
  // of the loop.  If not, the new IV can overflow and no one will notice.
  // The branch block must be in the loop and one of the successors must be out
  // of the loop.
  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
  if (!L->contains(TheBr->getParent()) ||
      (L->contains(TheBr->getSuccessor(0)) &&
       L->contains(TheBr->getSuccessor(1))))
    return;
  
  
  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
  // transform it.
  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
  int64_t ExitValue;
  if (ExitValueVal == 0 ||
      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
    return;
  
  // Find new predicate for integer comparison.
  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
  switch (Compare->getPredicate()) {
  default: return;  // Unknown comparison.
  case CmpInst::FCMP_OEQ:
  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
  case CmpInst::FCMP_ONE:
  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
  case CmpInst::FCMP_OGT:
  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
  case CmpInst::FCMP_OGE:
  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
  case CmpInst::FCMP_OLT:
  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
  case CmpInst::FCMP_OLE:
  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
  }
  
  // We convert the floating point induction variable to a signed i32 value if
  // we can.  This is only safe if the comparison will not overflow in a way
  // that won't be trapped by the integer equivalent operations.  Check for this
  // now.
  // TODO: We could use i64 if it is native and the range requires it.
  
  // The start/stride/exit values must all fit in signed i32.
  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
    return;

  // If not actually striding (add x, 0.0), avoid touching the code.
  if (IncValue == 0)
    return;

  // Positive and negative strides have different safety conditions.
  if (IncValue > 0) {
    // If we have a positive stride, we require the init to be less than the
    // exit value and an equality or less than comparison.
    if (InitValue >= ExitValue ||
        NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
      return;
    
    uint32_t Range = uint32_t(ExitValue-InitValue);
    if (NewPred == CmpInst::ICMP_SLE) {
      // Normalize SLE -> SLT, check for infinite loop.
      if (++Range == 0) return;  // Range overflows.
    }
    
    unsigned Leftover = Range % uint32_t(IncValue);
    
    // If this is an equality comparison, we require that the strided value
    // exactly land on the exit value, otherwise the IV condition will wrap
    // around and do things the fp IV wouldn't.
    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
        Leftover != 0)
      return;
    
    // If the stride would wrap around the i32 before exiting, we can't
    // transform the IV.
    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
      return;
    
  } else {
    // If we have a negative stride, we require the init to be greater than the
    // exit value and an equality or greater than comparison.
    if (InitValue >= ExitValue ||
        NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
      return;
    
    uint32_t Range = uint32_t(InitValue-ExitValue);
    if (NewPred == CmpInst::ICMP_SGE) {
      // Normalize SGE -> SGT, check for infinite loop.
      if (++Range == 0) return;  // Range overflows.
    }
    
    unsigned Leftover = Range % uint32_t(-IncValue);
    
    // If this is an equality comparison, we require that the strided value
    // exactly land on the exit value, otherwise the IV condition will wrap
    // around and do things the fp IV wouldn't.
    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
        Leftover != 0)
      return;
    
    // If the stride would wrap around the i32 before exiting, we can't
    // transform the IV.
    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
      return;
  }
  
  const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());

  // Insert new integer induction variable.
  PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN);
  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
                      PN->getIncomingBlock(IncomingEdge));

  Value *NewAdd =
    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
                              Incr->getName()+".int", Incr);
  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));

  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
                                      ConstantInt::get(Int32Ty, ExitValue),
                                      Compare->getName());

  // In the following deletions, PN may become dead and may be deleted.
  // Use a WeakVH to observe whether this happens.
  WeakVH WeakPH = PN;

  // Delete the old floating point exit comparison.  The branch starts using the
  // new comparison.
  NewCompare->takeName(Compare);
  Compare->replaceAllUsesWith(NewCompare);
  RecursivelyDeleteTriviallyDeadInstructions(Compare);

  // Delete the old floating point increment.
  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
  RecursivelyDeleteTriviallyDeadInstructions(Incr);

  // If the FP induction variable still has uses, this is because something else
  // in the loop uses its value.  In order to canonicalize the induction
  // variable, we chose to eliminate the IV and rewrite it in terms of an
  // int->fp cast.
  //
  // We give preference to sitofp over uitofp because it is faster on most
  // platforms.
  if (WeakPH) {
    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
                                 PN->getParent()->getFirstNonPHI());
    PN->replaceAllUsesWith(Conv);
    RecursivelyDeleteTriviallyDeadInstructions(PN);
  }

  // Add a new IVUsers entry for the newly-created integer PHI.
  IU->AddUsersIfInteresting(NewPHI);
}
bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
  Module &M = *F.getParent();
  LLVMContext &C = F.getContext();
  IRBuilder<> IRB(C);
  SmallVector<Instruction *, 64> ToErase;
  // Vector of %setjmpTable values
  std::vector<Instruction *> SetjmpTableInsts;
  // Vector of %setjmpTableSize values
  std::vector<Instruction *> SetjmpTableSizeInsts;

  // Setjmp preparation

  // This instruction effectively means %setjmpTableSize = 4.
  // We create this as an instruction intentionally, and we don't want to fold
  // this instruction to a constant 4, because this value will be used in
  // SSAUpdater.AddAvailableValue(...) later.
  BasicBlock &EntryBB = F.getEntryBlock();
  BinaryOperator *SetjmpTableSize = BinaryOperator::Create(
      Instruction::Add, IRB.getInt32(4), IRB.getInt32(0), "setjmpTableSize",
      &*EntryBB.getFirstInsertionPt());
  // setjmpTable = (int *) malloc(40);
  Instruction *SetjmpTable = CallInst::CreateMalloc(
      SetjmpTableSize, IRB.getInt32Ty(), IRB.getInt32Ty(), IRB.getInt32(40),
      nullptr, nullptr, "setjmpTable");
  // setjmpTable[0] = 0;
  IRB.SetInsertPoint(SetjmpTableSize);
  IRB.CreateStore(IRB.getInt32(0), SetjmpTable);
  SetjmpTableInsts.push_back(SetjmpTable);
  SetjmpTableSizeInsts.push_back(SetjmpTableSize);

  // Setjmp transformation
  std::vector<PHINode *> SetjmpRetPHIs;
  Function *SetjmpF = M.getFunction("setjmp");
  for (User *U : SetjmpF->users()) {
    auto *CI = dyn_cast<CallInst>(U);
    if (!CI)
      report_fatal_error("Does not support indirect calls to setjmp");

    BasicBlock *BB = CI->getParent();
    if (BB->getParent() != &F) // in other function
      continue;

    // The tail is everything right after the call, and will be reached once
    // when setjmp is called, and later when longjmp returns to the setjmp
    BasicBlock *Tail = SplitBlock(BB, CI->getNextNode());
    // Add a phi to the tail, which will be the output of setjmp, which
    // indicates if this is the first call or a longjmp back. The phi directly
    // uses the right value based on where we arrive from
    IRB.SetInsertPoint(Tail->getFirstNonPHI());
    PHINode *SetjmpRet = IRB.CreatePHI(IRB.getInt32Ty(), 2, "setjmp.ret");

    // setjmp initial call returns 0
    SetjmpRet->addIncoming(IRB.getInt32(0), BB);
    // The proper output is now this, not the setjmp call itself
    CI->replaceAllUsesWith(SetjmpRet);
    // longjmp returns to the setjmp will add themselves to this phi
    SetjmpRetPHIs.push_back(SetjmpRet);

    // Fix call target
    // Our index in the function is our place in the array + 1 to avoid index
    // 0, because index 0 means the longjmp is not ours to handle.
    IRB.SetInsertPoint(CI);
    Value *Args[] = {CI->getArgOperand(0), IRB.getInt32(SetjmpRetPHIs.size()),
                     SetjmpTable, SetjmpTableSize};
    Instruction *NewSetjmpTable =
        IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable");
    Instruction *NewSetjmpTableSize =
        IRB.CreateLoad(TempRet0GV, "setjmpTableSize");
    SetjmpTableInsts.push_back(NewSetjmpTable);
    SetjmpTableSizeInsts.push_back(NewSetjmpTableSize);
    ToErase.push_back(CI);
  }

  // Update each call that can longjmp so it can return to a setjmp where
  // relevant.

  // Because we are creating new BBs while processing and don't want to make
  // all these newly created BBs candidates again for longjmp processing, we
  // first make the vector of candidate BBs.
  std::vector<BasicBlock *> BBs;
  for (BasicBlock &BB : F)
    BBs.push_back(&BB);

  // BBs.size() will change within the loop, so we query it every time
  for (unsigned i = 0; i < BBs.size(); i++) {
    BasicBlock *BB = BBs[i];
    for (Instruction &I : *BB) {
      assert(!isa<InvokeInst>(&I));
      auto *CI = dyn_cast<CallInst>(&I);
      if (!CI)
        continue;

      const Value *Callee = CI->getCalledValue();
      if (!canLongjmp(M, Callee))
        continue;

      Value *Threw = nullptr;
      BasicBlock *Tail;
      if (Callee->getName().startswith(InvokePrefix)) {
        // If invoke wrapper has already been generated for this call in
        // previous EH phase, search for the load instruction
        // %__THREW__.val = __THREW__;
        // in postamble after the invoke wrapper call
        LoadInst *ThrewLI = nullptr;
        StoreInst *ThrewResetSI = nullptr;
        for (auto I = std::next(BasicBlock::iterator(CI)), IE = BB->end();
             I != IE; ++I) {
          if (auto *LI = dyn_cast<LoadInst>(I))
            if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()))
              if (GV == ThrewGV) {
                Threw = ThrewLI = LI;
                break;
              }
        }
        // Search for the store instruction after the load above
        // __THREW__ = 0;
        for (auto I = std::next(BasicBlock::iterator(ThrewLI)), IE = BB->end();
             I != IE; ++I) {
          if (auto *SI = dyn_cast<StoreInst>(I))
            if (auto *GV = dyn_cast<GlobalVariable>(SI->getPointerOperand()))
              if (GV == ThrewGV && SI->getValueOperand() == IRB.getInt32(0)) {
                ThrewResetSI = SI;
                break;
              }
        }
        assert(Threw && ThrewLI && "Cannot find __THREW__ load after invoke");
        assert(ThrewResetSI && "Cannot find __THREW__ store after invoke");
        Tail = SplitBlock(BB, ThrewResetSI->getNextNode());

      } else {
        // Wrap call with invoke wrapper and generate preamble/postamble
        Threw = wrapInvoke(CI);
        ToErase.push_back(CI);
        Tail = SplitBlock(BB, CI->getNextNode());
      }

      // We need to replace the terminator in Tail - SplitBlock makes BB go
      // straight to Tail, we need to check if a longjmp occurred, and go to the
      // right setjmp-tail if so
      ToErase.push_back(BB->getTerminator());

      // Generate a function call to testSetjmp function and preamble/postamble
      // code to figure out (1) whether longjmp occurred (2) if longjmp
      // occurred, which setjmp it corresponds to
      Value *Label = nullptr;
      Value *LongjmpResult = nullptr;
      BasicBlock *EndBB = nullptr;
      wrapTestSetjmp(BB, CI, Threw, SetjmpTable, SetjmpTableSize, Label,
                     LongjmpResult, EndBB);
      assert(Label && LongjmpResult && EndBB);

      // Create switch instruction
      IRB.SetInsertPoint(EndBB);
      SwitchInst *SI = IRB.CreateSwitch(Label, Tail, SetjmpRetPHIs.size());
      // -1 means no longjmp happened, continue normally (will hit the default
      // switch case). 0 means a longjmp that is not ours to handle, needs a
      // rethrow. Otherwise the index is the same as the index in P+1 (to avoid
      // 0).
      for (unsigned i = 0; i < SetjmpRetPHIs.size(); i++) {
        SI->addCase(IRB.getInt32(i + 1), SetjmpRetPHIs[i]->getParent());
        SetjmpRetPHIs[i]->addIncoming(LongjmpResult, EndBB);
      }

      // We are splitting the block here, and must continue to find other calls
      // in the block - which is now split. so continue to traverse in the Tail
      BBs.push_back(Tail);
    }
  }

  // Erase everything we no longer need in this function
  for (Instruction *I : ToErase)
    I->eraseFromParent();

  // Free setjmpTable buffer before each return instruction
  for (BasicBlock &BB : F) {
    TerminatorInst *TI = BB.getTerminator();
    if (isa<ReturnInst>(TI))
      CallInst::CreateFree(SetjmpTable, TI);
  }

  // Every call to saveSetjmp can change setjmpTable and setjmpTableSize
  // (when buffer reallocation occurs)
  // entry:
  //   setjmpTableSize = 4;
  //   setjmpTable = (int *) malloc(40);
  //   setjmpTable[0] = 0;
  // ...
  // somebb:
  //   setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
  //   setjmpTableSize = __tempRet0;
  // So we need to make sure the SSA for these variables is valid so that every
  // saveSetjmp and testSetjmp calls have the correct arguments.
  SSAUpdater SetjmpTableSSA;
  SSAUpdater SetjmpTableSizeSSA;
  SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable");
  SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize");
  for (Instruction *I : SetjmpTableInsts)
    SetjmpTableSSA.AddAvailableValue(I->getParent(), I);
  for (Instruction *I : SetjmpTableSizeInsts)
    SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I);

  for (auto UI = SetjmpTable->use_begin(), UE = SetjmpTable->use_end();
       UI != UE;) {
    // Grab the use before incrementing the iterator.
    Use &U = *UI;
    // Increment the iterator before removing the use from the list.
    ++UI;
    if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
      if (I->getParent() != &EntryBB)
        SetjmpTableSSA.RewriteUse(U);
  }
  for (auto UI = SetjmpTableSize->use_begin(), UE = SetjmpTableSize->use_end();
       UI != UE;) {
    Use &U = *UI;
    ++UI;
    if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
      if (I->getParent() != &EntryBB)
        SetjmpTableSizeSSA.RewriteUse(U);
  }

  // Finally, our modifications to the cfg can break dominance of SSA variables.
  // For example, in this code,
  // if (x()) { .. setjmp() .. }
  // if (y()) { .. longjmp() .. }
  // We must split the longjmp block, and it can jump into the block splitted
  // from setjmp one. But that means that when we split the setjmp block, it's
  // first part no longer dominates its second part - there is a theoretically
  // possible control flow path where x() is false, then y() is true and we
  // reach the second part of the setjmp block, without ever reaching the first
  // part. So, we rebuild SSA form here.
  rebuildSSA(F);
  return true;
}
/*
BalanceTree(root I)
  worklist: set
  leaves: vector
  mark I visited
  Push(worklist, Ra. Rb)
  // find all the leaves of the tree rooted at I
  while worklist not empty
    // look backwards following def-use from use
    T = ’R1 <- op1, Ra1, Rb1’ = Def(Pop(worklist))
    if T is a root
      // balance computes weight in this case
      if T not visited
         BalanceTree(T)
      SortedInsert(leaves, T, Weight(T))
    else if op(T) == op(I)
      // add uses to worklist
      Push(worklist, Ra1, Rb1)
*/
BinaryOperator* balanceTree(BinaryOperator* root, std::map<Instruction*,bool>& visitMap, std::vector<BinaryOperator*>& roots)
{
  assert(root);
  if(visitMap[root])
    return NULL;
  std::list<Value*> worklist;
  std::set<std::pair<int,Value*>,weight_less_than> leaves;
  visitMap[root] = true;
  worklist.push_back( root->getOperand(0) );
  worklist.push_back( root->getOperand(1) );
  while( !worklist.empty() )
  {
    Value* v = worklist.front();
    worklist.pop_front();
    assert(v);
    BinaryOperator* T = dynamic_cast<BinaryOperator*>(v);
    if( T and std::find(roots.begin(), roots.end(), T) != roots.end() ) // T is a binary operator that exists in the root list
    {
      if( !visitMap[T] ) //if we havent visited it, replace it with its balanced version
      {
        T = balanceTree(T, visitMap, roots);
      }
      if( !T )
      {
        INTERNAL_ERROR("balanceTree(" << *root << ") failed while attempting to balance leaf node " << *v << "; balance returned NULL!\n");
      }
      assert( T and "Balancing operation that was a root resulted in NULL being returned from balance function!" );
      leaves.insert(std::pair<int,Instruction*>(calculateWeight(T, roots), T));
    }
    else if( T and !isDifferentOperation(T, root) ) //if T isnt a root, and isnt a different operation than our root, we need to process it
    {
      worklist.push_back( T->getOperand(0) );
      worklist.push_back( T->getOperand(1) );
      //remove all of the signed, name, and size call uses
      for(Value::use_iterator UI = T->use_begin(); UI != T->use_end();)
      {
        CallInst* CI = dynamic_cast<CallInst*>(*UI);
        if( isROCCCFunctionCall(CI, ROCCCNames::VariableName) or
            isROCCCFunctionCall(CI, ROCCCNames::VariableSize) or
            isROCCCFunctionCall(CI, ROCCCNames::VariableSigned) )
        {
          CI->eraseFromParent();
          UI = T->use_begin();
        }
        else
          ++UI;
      }
    }
    else //T isnt a BinaryOperator, or isn't a root, or is a different operation than our root - just add it as a single leaf
    {
      leaves.insert(std::pair<int,Value*>(1, v));
    }
  }
  /*
  // construct a balanced tree from leaves
  while size(leaves) > 1
    Ra1 = Dequeue(leaves)
    Rb1 = Dequeue(leaves)
    T = ’R1 <- op1, Ra1, Rb1’
    insert T before I
    Weight(R1) = Weight(Ra1) + Weight(Rb1)
    SortedInsert(leaves, R1, Weight(R1))
  */
  while( leaves.size() > 1 )
  {
    std::pair<int,Value*> Ra1 = *leaves.begin();
    leaves.erase(leaves.begin());
    std::pair<int,Value*> Rb1 = *leaves.begin();
    leaves.erase(leaves.begin());
    int weight = Ra1.first + Rb1.first;
    //workaround to create a binary instruction with different operand types; create with undefs, then replace
    BinaryOperator* T = BinaryOperator::create(root->getOpcode(), UndefValue::get(root->getType()), UndefValue::get(root->getType()), "tmp", root);
    T->setOperand(0, Ra1.second);
    T->setOperand(1, Rb1.second);
    setSizeInBits(T, getSizeInBits(root));
    setValueSigned(T, isValueSigned(root));
    leaves.insert(std::pair<int,Value*>(weight, T));
  }
  BinaryOperator* last_inserted = NULL;
  if(leaves.begin() != leaves.end())
    last_inserted = dynamic_cast<BinaryOperator*>(leaves.begin()->second);
  if( last_inserted )
  {
    setValueName(last_inserted, getValueName(root));
    root->uncheckedReplaceAllUsesWith(last_inserted);
    std::string name = root->getName();
    root->eraseFromParent();
    last_inserted->setName(name);
    roots.erase(std::find(roots.begin(), roots.end(), root));
    roots.push_back(last_inserted);
    visitMap[last_inserted] = true;
  }
  return last_inserted;
}
/// HandleFloatingPointIV - If the loop has floating induction variable
/// then insert corresponding integer induction variable if possible.
/// For example,
/// for(double i = 0; i < 10000; ++i)
///   bar(i)
/// is converted into
/// for(int i = 0; i < 10000; ++i)
///   bar((double)i);
///
void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {

  unsigned IncomingEdge = L->contains(PH->getIncomingBlock(0));
  unsigned BackEdge     = IncomingEdge^1;

  // Check incoming value.
  ConstantFP *InitValue = dyn_cast<ConstantFP>(PH->getIncomingValue(IncomingEdge));
  if (!InitValue) return;
  uint64_t newInitValue =
              Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();
  if (!convertToInt(InitValue->getValueAPF(), &newInitValue))
    return;

  // Check IV increment. Reject this PH if increment operation is not
  // an add or increment value can not be represented by an integer.
  BinaryOperator *Incr =
    dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge));
  if (!Incr) return;
  if (Incr->getOpcode() != Instruction::FAdd) return;
  ConstantFP *IncrValue = NULL;
  unsigned IncrVIndex = 1;
  if (Incr->getOperand(1) == PH)
    IncrVIndex = 0;
  IncrValue = dyn_cast<ConstantFP>(Incr->getOperand(IncrVIndex));
  if (!IncrValue) return;
  uint64_t newIncrValue =
              Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();
  if (!convertToInt(IncrValue->getValueAPF(), &newIncrValue))
    return;

  // Check Incr uses. One user is PH and the other users is exit condition used
  // by the conditional terminator.
  Value::use_iterator IncrUse = Incr->use_begin();
  Instruction *U1 = cast<Instruction>(IncrUse++);
  if (IncrUse == Incr->use_end()) return;
  Instruction *U2 = cast<Instruction>(IncrUse++);
  if (IncrUse != Incr->use_end()) return;

  // Find exit condition.
  FCmpInst *EC = dyn_cast<FCmpInst>(U1);
  if (!EC)
    EC = dyn_cast<FCmpInst>(U2);
  if (!EC) return;

  if (BranchInst *BI = dyn_cast<BranchInst>(EC->getParent()->getTerminator())) {
    if (!BI->isConditional()) return;
    if (BI->getCondition() != EC) return;
  }

  // Find exit value. If exit value can not be represented as an integer then
  // do not handle this floating point PH.
  ConstantFP *EV = NULL;
  unsigned EVIndex = 1;
  if (EC->getOperand(1) == Incr)
    EVIndex = 0;
  EV = dyn_cast<ConstantFP>(EC->getOperand(EVIndex));
  if (!EV) return;
  uint64_t intEV = Type::getInt32Ty(PH->getContext())->getPrimitiveSizeInBits();
  if (!convertToInt(EV->getValueAPF(), &intEV))
    return;

  // Find new predicate for integer comparison.
  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
  switch (EC->getPredicate()) {
  case CmpInst::FCMP_OEQ:
  case CmpInst::FCMP_UEQ:
    NewPred = CmpInst::ICMP_EQ;
    break;
  case CmpInst::FCMP_OGT:
  case CmpInst::FCMP_UGT:
    NewPred = CmpInst::ICMP_UGT;
    break;
  case CmpInst::FCMP_OGE:
  case CmpInst::FCMP_UGE:
    NewPred = CmpInst::ICMP_UGE;
    break;
  case CmpInst::FCMP_OLT:
  case CmpInst::FCMP_ULT:
    NewPred = CmpInst::ICMP_ULT;
    break;
  case CmpInst::FCMP_OLE:
  case CmpInst::FCMP_ULE:
    NewPred = CmpInst::ICMP_ULE;
    break;
  default:
    break;
  }
  if (NewPred == CmpInst::BAD_ICMP_PREDICATE) return;

  // Insert new integer induction variable.
  PHINode *NewPHI = PHINode::Create(Type::getInt32Ty(PH->getContext()),
                                    PH->getName()+".int", PH);
  NewPHI->addIncoming(ConstantInt::get(Type::getInt32Ty(PH->getContext()),
                                       newInitValue),
                      PH->getIncomingBlock(IncomingEdge));

  Value *NewAdd = BinaryOperator::CreateAdd(NewPHI,
                           ConstantInt::get(Type::getInt32Ty(PH->getContext()),
                                                             newIncrValue),
                                            Incr->getName()+".int", Incr);
  NewPHI->addIncoming(NewAdd, PH->getIncomingBlock(BackEdge));

  // The back edge is edge 1 of newPHI, whatever it may have been in the
  // original PHI.
  ConstantInt *NewEV = ConstantInt::get(Type::getInt32Ty(PH->getContext()),
                                        intEV);
  Value *LHS = (EVIndex == 1 ? NewPHI->getIncomingValue(1) : NewEV);
  Value *RHS = (EVIndex == 1 ? NewEV : NewPHI->getIncomingValue(1));
  ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(),
                                 NewPred, LHS, RHS, EC->getName());

  // In the following deletions, PH may become dead and may be deleted.
  // Use a WeakVH to observe whether this happens.
  WeakVH WeakPH = PH;

  // Delete old, floating point, exit comparison instruction.
  NewEC->takeName(EC);
  EC->replaceAllUsesWith(NewEC);
  RecursivelyDeleteTriviallyDeadInstructions(EC);

  // Delete old, floating point, increment instruction.
  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
  RecursivelyDeleteTriviallyDeadInstructions(Incr);

  // Replace floating induction variable, if it isn't already deleted.
  // Give SIToFPInst preference over UIToFPInst because it is faster on
  // platforms that are widely used.
  if (WeakPH && !PH->use_empty()) {
    if (useSIToFPInst(*InitValue, *EV, newInitValue, intEV)) {
      SIToFPInst *Conv = new SIToFPInst(NewPHI, PH->getType(), "indvar.conv",
                                        PH->getParent()->getFirstNonPHI());
      PH->replaceAllUsesWith(Conv);
    } else {
      UIToFPInst *Conv = new UIToFPInst(NewPHI, PH->getType(), "indvar.conv",
                                        PH->getParent()->getFirstNonPHI());
      PH->replaceAllUsesWith(Conv);
    }
    RecursivelyDeleteTriviallyDeadInstructions(PH);
  }

  // Add a new IVUsers entry for the newly-created integer PHI.
  IU->AddUsersIfInteresting(NewPHI);
}