Example #1
0
// Delete the instructions that we scalarized.  If a full vector result
// is still needed, recreate it using InsertElements.
bool Scalarizer::finish() {
  // The presence of data in Gathered or Scattered indicates changes
  // made to the Function.
  if (Gathered.empty() && Scattered.empty())
    return false;
  for (const auto &GMI : Gathered) {
    Instruction *Op = GMI.first;
    ValueVector &CV = *GMI.second;
    if (!Op->use_empty()) {
      // The value is still needed, so recreate it using a series of
      // InsertElements.
      Type *Ty = Op->getType();
      Value *Res = UndefValue::get(Ty);
      BasicBlock *BB = Op->getParent();
      unsigned Count = Ty->getVectorNumElements();
      IRBuilder<> Builder(Op);
      if (isa<PHINode>(Op))
        Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
      for (unsigned I = 0; I < Count; ++I)
        Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
                                          Op->getName() + ".upto" + Twine(I));
      Res->takeName(Op);
      Op->replaceAllUsesWith(Res);
    }
    Op->eraseFromParent();
  }
  Gathered.clear();
  Scattered.clear();
  return true;
}
Example #2
0
// Delete the instructions that we scalarized.  If a full vector result
// is still needed, recreate it using InsertElements.
bool Scalarizer::finish() {
  if (Gathered.empty())
    return false;
  for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end();
       GMI != GME; ++GMI) {
    Instruction *Op = GMI->first;
    ValueVector &CV = *GMI->second;
    if (!Op->use_empty()) {
      // The value is still needed, so recreate it using a series of
      // InsertElements.
      Type *Ty = Op->getType();
      Value *Res = UndefValue::get(Ty);
      unsigned Count = Ty->getVectorNumElements();
      IRBuilder<> Builder(Op->getParent(), Op);
      for (unsigned I = 0; I < Count; ++I)
        Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
                                          Op->getName() + ".upto" + Twine(I));
      Res->takeName(Op);
      Op->replaceAllUsesWith(Res);
    }
    Op->eraseFromParent();
  }
  Gathered.clear();
  Scattered.clear();
  return true;
}
Example #3
0
bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
  InsertAutomaticGCRoots(F, S);
  // FIXME: Turn this back on after fixing gcregroot in SelectionDAG.
  //InsertGCRegisterRoots(F, S);

  bool LowerWr = !S.customWriteBarrier();
  bool LowerRd = !S.customReadBarrier();
  bool InitRoots = S.initializeRoots();

  SmallVector<Instruction *, 32> Roots;

  bool MadeChange = false;
  for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE; ++BB) {
    for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
        Function *F = CI->getCalledFunction();
        switch (F->getIntrinsicID()) {
        case Intrinsic::gcwrite:
          if (LowerWr) {
            // Replace a write barrier with a simple store.
            Value *St = new StoreInst(CI->getArgOperand(0),
                                      CI->getArgOperand(2), CI);
            CI->replaceAllUsesWith(St);
            CI->eraseFromParent();
          }
          break;
        case Intrinsic::gcread:
          if (LowerRd) {
            // Replace a read barrier with a simple load.
            Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
            Ld->takeName(CI);
            CI->replaceAllUsesWith(Ld);
            CI->eraseFromParent();
          }
          break;
        case Intrinsic::gcroot:
          if (InitRoots) {
            // Initialize the GC root, but do not delete the intrinsic. The
            // backend needs the intrinsic to flag the stack slot.
            Value *V = CI->getArgOperand(0)->stripPointerCastsOnly();
            Roots.push_back(cast<Instruction>(V));
          }
          break;
        default:
          continue;
        }

        MadeChange = true;
      }
    }
  }

  if (Roots.size())
    MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());

  return MadeChange;
}
void FuncRewriter::expandFunc() {
  Type *I32 = Type::getInt32Ty(Func->getContext());

  // We need to do two passes: When we process an invoke we need to
  // look at its landingpad, so we can't remove the landingpads until
  // all the invokes have been processed.
  for (Function::iterator BB = Func->begin(), E = Func->end(); BB != E; ++BB) {
    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) {
      Instruction *Inst = Iter++;
      if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
        expandInvokeInst(Invoke);
      } else if (ResumeInst *Resume = dyn_cast<ResumeInst>(Inst)) {
        expandResumeInst(Resume);
      } else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(Inst)) {
        if (Intrinsic->getIntrinsicID() == Intrinsic::eh_typeid_for) {
          Value *ExcType = Intrinsic->getArgOperand(0);
          Value *Val = ConstantInt::get(
              I32, ExcInfoWriter->getIDForExceptionType(ExcType));
          Intrinsic->replaceAllUsesWith(Val);
          Intrinsic->eraseFromParent();
        }
      }
    }
  }
  for (Function::iterator BB = Func->begin(), E = Func->end(); BB != E; ++BB) {
    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) {
      Instruction *Inst = Iter++;
      if (LandingPadInst *LP = dyn_cast<LandingPadInst>(Inst)) {
        initializeFrame();
        Value *LPPtr = new BitCastInst(
            FrameJmpBuf, LP->getType()->getPointerTo(), "landingpad_ptr", LP);
        Value *LPVal = CopyDebug(new LoadInst(LPPtr, "", LP), LP);
        LPVal->takeName(LP);
        LP->replaceAllUsesWith(LPVal);
        LP->eraseFromParent();
      }
    }
  }
}
Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
    GEPOperator *GEP, int Depth) {
  Value *NewOperand =
      hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1);
  if (NewOperand == nullptr)
    return nullptr;

  // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
  assert(isEliminableAddrSpaceCast(NewOperand));
  Operator *Cast = cast<Operator>(NewOperand);

  SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
  Value *NewASC;
  if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) {
    // GEP = gep (addrspacecast X), indices
    // =>
    // NewGEP = gep X, indices
    // NewASC = addrspacecast NewGEP
    GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
        GEP->getSourceElementType(), Cast->getOperand(0), Indices,
        "", GEPI);
    NewGEP->setIsInBounds(GEP->isInBounds());
    NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI);
    NewASC->takeName(GEP);
    // Without RAUWing GEP, the compiler would visit GEP again and emit
    // redundant instructions. This is exercised in test @rauw in
    // access-non-generic.ll.
    GEP->replaceAllUsesWith(NewASC);
  } else {
    // GEP is a constant expression.
    Constant *NewGEP = ConstantExpr::getGetElementPtr(
        GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
        Indices, GEP->isInBounds());
    NewASC = ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType());
  }
  return NewASC;
}
void StraightLineStrengthReduce::rewriteCandidateWithBasis(
    const Candidate &C, const Candidate &Basis) {
  // An instruction can correspond to multiple candidates. Therefore, instead of
  // simply deleting an instruction when we rewrite it, we mark its parent as
  // nullptr (i.e. unlink it) so that we can skip the candidates whose
  // instruction is already rewritten.
  if (!C.Ins->getParent())
    return;
  assert(C.Base == Basis.Base && C.Stride == Basis.Stride);
  // Basis = (B + i) * S
  // C     = (B + i') * S
  //   ==>
  // C     = Basis + (i' - i) * S
  IRBuilder<> Builder(C.Ins);
  ConstantInt *IndexOffset = ConstantInt::get(
      C.Ins->getContext(), C.Index->getValue() - Basis.Index->getValue());
  Value *Reduced;
  // TODO: preserve nsw/nuw in some cases.
  if (IndexOffset->isOne()) {
    // If (i' - i) is 1, fold C into Basis + S.
    Reduced = Builder.CreateAdd(Basis.Ins, C.Stride);
  } else if (IndexOffset->isMinusOne()) {
    // If (i' - i) is -1, fold C into Basis - S.
    Reduced = Builder.CreateSub(Basis.Ins, C.Stride);
  } else {
    Value *Bump = Builder.CreateMul(C.Stride, IndexOffset);
    Reduced = Builder.CreateAdd(Basis.Ins, Bump);
  }
  Reduced->takeName(C.Ins);
  C.Ins->replaceAllUsesWith(Reduced);
  C.Ins->dropAllReferences();
  // Unlink C.Ins so that we can skip other candidates also corresponding to
  // C.Ins. The actual deletion is postponed to the end of runOnFunction.
  C.Ins->removeFromParent();
  UnlinkedInstructions.insert(C.Ins);
}
Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast(
    BitCastOperator *BC, int Depth) {
  Value *NewOperand = hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1);
  if (NewOperand == nullptr)
    return nullptr;

  // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
  assert(isEliminableAddrSpaceCast(NewOperand));
  Operator *Cast = cast<Operator>(NewOperand);

  // Cast  = addrspacecast Src
  // BC    = bitcast Cast
  //   =>
  // Cast' = bitcast Src
  // BC'   = addrspacecast Cast'
  Value *Src = Cast->getOperand(0);
  Type *TypeOfNewCast =
      PointerType::get(BC->getType()->getPointerElementType(),
                       Src->getType()->getPointerAddressSpace());
  Value *NewBC;
  if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) {
    Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI);
    NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI);
    NewBC->takeName(BC);
    // Without RAUWing BC, the compiler would visit BC again and emit
    // redundant instructions. This is exercised in test @rauw in
    // access-non-generic.ll.
    BC->replaceAllUsesWith(NewBC);
  } else {
    // BC is a constant expression.
    Constant *NewCast =
        ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast);
    NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType());
  }
  return NewBC;
}
Instruction *InstCombiner::visitMul(BinaryOperator &I) {
  bool Changed = SimplifyAssociativeOrCommutative(I);
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);

  if (Value *V = SimplifyMulInst(Op0, Op1, TD))
    return ReplaceInstUsesWith(I, V);

  if (Value *V = SimplifyUsingDistributiveLaws(I))
    return ReplaceInstUsesWith(I, V);

  if (match(Op1, m_AllOnes()))  // X * -1 == 0 - X
    return BinaryOperator::CreateNeg(Op0, I.getName());
  
  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
    
    // ((X << C1)*C2) == (X * (C2 << C1))
    if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
      if (SI->getOpcode() == Instruction::Shl)
        if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
          return BinaryOperator::CreateMul(SI->getOperand(0),
                                           ConstantExpr::getShl(CI, ShOp));
    
    const APInt &Val = CI->getValue();
    if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
      Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
      BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst);
      if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
      if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
      return Shl;
    }
    
    // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
    { Value *X; ConstantInt *C1;
      if (Op0->hasOneUse() &&
          match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) {
        Value *Add = Builder->CreateMul(X, CI);
        return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI));
      }
    }

    // (Y - X) * (-(2**n)) -> (X - Y) * (2**n), for positive nonzero n
    // (Y + const) * (-(2**n)) -> (-constY) * (2**n), for positive nonzero n
    // The "* (2**n)" thus becomes a potential shifting opportunity.
    {
      const APInt &   Val = CI->getValue();
      const APInt &PosVal = Val.abs();
      if (Val.isNegative() && PosVal.isPowerOf2()) {
        Value *X = 0, *Y = 0;
        if (Op0->hasOneUse()) {
          ConstantInt *C1;
          Value *Sub = 0;
          if (match(Op0, m_Sub(m_Value(Y), m_Value(X))))
            Sub = Builder->CreateSub(X, Y, "suba");
          else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1))))
            Sub = Builder->CreateSub(Builder->CreateNeg(C1), Y, "subc");
          if (Sub)
            return
              BinaryOperator::CreateMul(Sub,
                                        ConstantInt::get(Y->getType(), PosVal));
        }
      }
    }
  }
  
  // Simplify mul instructions with a constant RHS.
  if (isa<Constant>(Op1)) {    
    // Try to fold constant mul into select arguments.
    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
      if (Instruction *R = FoldOpIntoSelect(I, SI))
        return R;

    if (isa<PHINode>(Op0))
      if (Instruction *NV = FoldOpIntoPhi(I))
        return NV;
  }

  if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y
    if (Value *Op1v = dyn_castNegVal(Op1))
      return BinaryOperator::CreateMul(Op0v, Op1v);

  // (X / Y) *  Y = X - (X % Y)
  // (X / Y) * -Y = (X % Y) - X
  {
    Value *Op1C = Op1;
    BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
    if (!BO ||
        (BO->getOpcode() != Instruction::UDiv && 
         BO->getOpcode() != Instruction::SDiv)) {
      Op1C = Op0;
      BO = dyn_cast<BinaryOperator>(Op1);
    }
    Value *Neg = dyn_castNegVal(Op1C);
    if (BO && BO->hasOneUse() &&
        (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
        (BO->getOpcode() == Instruction::UDiv ||
         BO->getOpcode() == Instruction::SDiv)) {
      Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);

      // If the division is exact, X % Y is zero, so we end up with X or -X.
      if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO))
        if (SDiv->isExact()) {
          if (Op1BO == Op1C)
            return ReplaceInstUsesWith(I, Op0BO);
          return BinaryOperator::CreateNeg(Op0BO);
        }

      Value *Rem;
      if (BO->getOpcode() == Instruction::UDiv)
        Rem = Builder->CreateURem(Op0BO, Op1BO);
      else
        Rem = Builder->CreateSRem(Op0BO, Op1BO);
      Rem->takeName(BO);

      if (Op1BO == Op1C)
        return BinaryOperator::CreateSub(Op0BO, Rem);
      return BinaryOperator::CreateSub(Rem, Op0BO);
    }
  }

  /// i1 mul -> i1 and.
  if (I.getType()->isIntegerTy(1))
    return BinaryOperator::CreateAnd(Op0, Op1);

  // X*(1 << Y) --> X << Y
  // (1 << Y)*X --> X << Y
  {
    Value *Y;
    if (match(Op0, m_Shl(m_One(), m_Value(Y))))
      return BinaryOperator::CreateShl(Op1, Y);
    if (match(Op1, m_Shl(m_One(), m_Value(Y))))
      return BinaryOperator::CreateShl(Op0, Y);
  }
  
  // If one of the operands of the multiply is a cast from a boolean value, then
  // we know the bool is either zero or one, so this is a 'masking' multiply.
  //   X * Y (where Y is 0 or 1) -> X & (0-Y)
  if (!I.getType()->isVectorTy()) {
    // -2 is "-1 << 1" so it is all bits set except the low one.
    APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
    
    Value *BoolCast = 0, *OtherOp = 0;
    if (MaskedValueIsZero(Op0, Negative2))
      BoolCast = Op0, OtherOp = Op1;
    else if (MaskedValueIsZero(Op1, Negative2))
      BoolCast = Op1, OtherOp = Op0;

    if (BoolCast) {
      Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
                                    BoolCast);
      return BinaryOperator::CreateAnd(V, OtherOp);
    }
  }

  return Changed ? &I : 0;
}
Example #9
0
/// DoPromotion - This method actually performs the promotion of the specified
/// arguments, and returns the new function.  At this point, we know that it's
/// safe to do so.
CallGraphNode *ArgPromotion::DoPromotion(Function *F,
                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
                              SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {

  // Start by computing a new prototype for the function, which is the same as
  // the old function, but has modified arguments.
  const FunctionType *FTy = F->getFunctionType();
  std::vector<const Type*> Params;

  typedef std::set<IndicesVector> ScalarizeTable;

  // ScalarizedElements - If we are promoting a pointer that has elements
  // accessed out of it, keep track of which elements are accessed so that we
  // can add one argument for each.
  //
  // Arguments that are directly loaded will have a zero element value here, to
  // handle cases where there are both a direct load and GEP accesses.
  //
  std::map<Argument*, ScalarizeTable> ScalarizedElements;

  // OriginalLoads - Keep track of a representative load instruction from the
  // original function so that we can tell the alias analysis implementation
  // what the new GEP/Load instructions we are inserting look like.
  std::map<IndicesVector, LoadInst*> OriginalLoads;

  // Attributes - Keep track of the parameter attributes for the arguments
  // that we are *not* promoting. For the ones that we do promote, the parameter
  // attributes are lost
  SmallVector<AttributeWithIndex, 8> AttributesVec;
  const AttrListPtr &PAL = F->getAttributes();

  // Add any return attributes.
  if (Attributes attrs = PAL.getRetAttributes())
    AttributesVec.push_back(AttributeWithIndex::get(0, attrs));

  // First, determine the new argument list
  unsigned ArgIndex = 1;
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
       ++I, ++ArgIndex) {
    if (ByValArgsToTransform.count(I)) {
      // Simple byval argument? Just add all the struct element types.
      const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      const StructType *STy = cast<StructType>(AgTy);
      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
        Params.push_back(STy->getElementType(i));
      ++NumByValArgsPromoted;
    } else if (!ArgsToPromote.count(I)) {
      // Unchanged argument
      Params.push_back(I->getType());
      if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
        AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
    } else if (I->use_empty()) {
      // Dead argument (which are always marked as promotable)
      ++NumArgumentsDead;
    } else {
      // Okay, this is being promoted. This means that the only uses are loads
      // or GEPs which are only used by loads

      // In this table, we will track which indices are loaded from the argument
      // (where direct loads are tracked as no indices).
      ScalarizeTable &ArgIndices = ScalarizedElements[I];
      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
           ++UI) {
        Instruction *User = cast<Instruction>(*UI);
        assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User));
        IndicesVector Indices;
        Indices.reserve(User->getNumOperands() - 1);
        // Since loads will only have a single operand, and GEPs only a single
        // non-index operand, this will record direct loads without any indices,
        // and gep+loads with the GEP indices.
        for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end();
             II != IE; ++II)
          Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
        // GEPs with a single 0 index can be merged with direct loads
        if (Indices.size() == 1 && Indices.front() == 0)
          Indices.clear();
        ArgIndices.insert(Indices);
        LoadInst *OrigLoad;
        if (LoadInst *L = dyn_cast<LoadInst>(User))
          OrigLoad = L;
        else
          // Take any load, we will use it only to update Alias Analysis
          OrigLoad = cast<LoadInst>(User->use_back());
        OriginalLoads[Indices] = OrigLoad;
      }

      // Add a parameter to the function for each element passed in.
      for (ScalarizeTable::iterator SI = ArgIndices.begin(),
             E = ArgIndices.end(); SI != E; ++SI) {
        // not allowed to dereference ->begin() if size() is 0
        Params.push_back(GetElementPtrInst::getIndexedType(I->getType(),
                                                           SI->begin(),
                                                           SI->end()));
        assert(Params.back());
      }

      if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
        ++NumArgumentsPromoted;
      else
        ++NumAggregatesPromoted;
    }
  }

  // Add any function attributes.
  if (Attributes attrs = PAL.getFnAttributes())
    AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));

  const Type *RetTy = FTy->getReturnType();

  // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
  // have zero fixed arguments.
  bool ExtraArgHack = false;
  if (Params.empty() && FTy->isVarArg()) {
    ExtraArgHack = true;
    Params.push_back(Type::getInt32Ty(F->getContext()));
  }

  // Construct the new function type using the new arguments.
  FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());

  // Create the new function body and insert it into the module.
  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
  NF->copyAttributesFrom(F);

  
  DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
        << "From: " << *F);
  
  // Recompute the parameter attributes list based on the new arguments for
  // the function.
  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                     AttributesVec.end()));
  AttributesVec.clear();

  F->getParent()->getFunctionList().insert(F, NF);
  NF->takeName(F);

  // Get the alias analysis information that we need to update to reflect our
  // changes.
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

  // Get the callgraph information that we need to update to reflect our
  // changes.
  CallGraph &CG = getAnalysis<CallGraph>();
  
  // Get a new callgraph node for NF.
  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
  

  // Loop over all of the callers of the function, transforming the call sites
  // to pass in the loaded pointers.
  //
  SmallVector<Value*, 16> Args;
  while (!F->use_empty()) {
    CallSite CS = CallSite::get(F->use_back());
    assert(CS.getCalledFunction() == F);
    Instruction *Call = CS.getInstruction();
    const AttrListPtr &CallPAL = CS.getAttributes();

    // Add any return attributes.
    if (Attributes attrs = CallPAL.getRetAttributes())
      AttributesVec.push_back(AttributeWithIndex::get(0, attrs));

    // Loop over the operands, inserting GEP and loads in the caller as
    // appropriate.
    CallSite::arg_iterator AI = CS.arg_begin();
    ArgIndex = 1;
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
         I != E; ++I, ++AI, ++ArgIndex)
      if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
        Args.push_back(*AI);          // Unmodified argument

        if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
          AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));

      } else if (ByValArgsToTransform.count(I)) {
        // Emit a GEP and load for each element of the struct.
        const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
        const StructType *STy = cast<StructType>(AgTy);
        Value *Idxs[2] = {
              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
          Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,
                                                 (*AI)->getName()+"."+utostr(i),
                                                 Call);
          // TODO: Tell AA about the new values?
          Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
        }
      } else if (!I->use_empty()) {
        // Non-dead argument: insert GEPs and loads as appropriate.
        ScalarizeTable &ArgIndices = ScalarizedElements[I];
        // Store the Value* version of the indices in here, but declare it now
        // for reuse.
        std::vector<Value*> Ops;
        for (ScalarizeTable::iterator SI = ArgIndices.begin(),
               E = ArgIndices.end(); SI != E; ++SI) {
          Value *V = *AI;
          LoadInst *OrigLoad = OriginalLoads[*SI];
          if (!SI->empty()) {
            Ops.reserve(SI->size());
            const Type *ElTy = V->getType();
            for (IndicesVector::const_iterator II = SI->begin(),
                 IE = SI->end(); II != IE; ++II) {
              // Use i32 to index structs, and i64 for others (pointers/arrays).
              // This satisfies GEP constraints.
              const Type *IdxTy = (ElTy->isStructTy() ?
                    Type::getInt32Ty(F->getContext()) : 
                    Type::getInt64Ty(F->getContext()));
              Ops.push_back(ConstantInt::get(IdxTy, *II));
              // Keep track of the type we're currently indexing.
              ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
            }
            // And create a GEP to extract those indices.
            V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(),
                                          V->getName()+".idx", Call);
            Ops.clear();
            AA.copyValue(OrigLoad->getOperand(0), V);
          }
          // Since we're replacing a load make sure we take the alignment
          // of the previous load.
          LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
          newLoad->setAlignment(OrigLoad->getAlignment());
          Args.push_back(newLoad);
          AA.copyValue(OrigLoad, Args.back());
        }
      }

    if (ExtraArgHack)
      Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));

    // Push any varargs arguments on the list.
    for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
      Args.push_back(*AI);
      if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
        AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
    }

    // Add any function attributes.
    if (Attributes attrs = CallPAL.getFnAttributes())
      AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));

    Instruction *New;
    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
                               Args.begin(), Args.end(), "", Call);
      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
      cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                                          AttributesVec.end()));
    } else {
      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
      cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                                        AttributesVec.end()));
      if (cast<CallInst>(Call)->isTailCall())
        cast<CallInst>(New)->setTailCall();
    }
    Args.clear();
    AttributesVec.clear();

    // Update the alias analysis implementation to know that we are replacing
    // the old call with a new one.
    AA.replaceWithNewValue(Call, New);

    // Update the callgraph to know that the callsite has been transformed.
    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
    CalleeNode->replaceCallEdge(Call, New, NF_CGN);

    if (!Call->use_empty()) {
      Call->replaceAllUsesWith(New);
      New->takeName(Call);
    }

    // Finally, remove the old call from the program, reducing the use-count of
    // F.
    Call->eraseFromParent();
  }

  // Since we have now created the new function, splice the body of the old
  // function right into the new function, leaving the old rotting hulk of the
  // function empty.
  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());

  // Loop over the argument list, transfering uses of the old arguments over to
  // the new arguments, also transfering over the names as well.
  //
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
       I2 = NF->arg_begin(); I != E; ++I) {
    if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
      // If this is an unmodified argument, move the name and users over to the
      // new version.
      I->replaceAllUsesWith(I2);
      I2->takeName(I);
      AA.replaceWithNewValue(I, I2);
      ++I2;
      continue;
    }

    if (ByValArgsToTransform.count(I)) {
      // In the callee, we create an alloca, and store each of the new incoming
      // arguments into the alloca.
      Instruction *InsertPt = NF->begin()->begin();

      // Just add all the struct element types.
      const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
      const StructType *STy = cast<StructType>(AgTy);
      Value *Idxs[2] = {
            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };

      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
        Value *Idx = 
          GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
                                    TheAlloca->getName()+"."+Twine(i), 
                                    InsertPt);
        I2->setName(I->getName()+"."+Twine(i));
        new StoreInst(I2++, Idx, InsertPt);
      }

      // Anything that used the arg should now use the alloca.
      I->replaceAllUsesWith(TheAlloca);
      TheAlloca->takeName(I);
      AA.replaceWithNewValue(I, TheAlloca);
      continue;
    }

    if (I->use_empty()) {
      AA.deleteValue(I);
      continue;
    }

    // Otherwise, if we promoted this argument, then all users are load
    // instructions (or GEPs with only load users), and all loads should be
    // using the new argument that we added.
    ScalarizeTable &ArgIndices = ScalarizedElements[I];

    while (!I->use_empty()) {
      if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) {
        assert(ArgIndices.begin()->empty() &&
               "Load element should sort to front!");
        I2->setName(I->getName()+".val");
        LI->replaceAllUsesWith(I2);
        AA.replaceWithNewValue(LI, I2);
        LI->eraseFromParent();
        DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
              << "' in function '" << F->getName() << "'\n");
      } else {
        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
        IndicesVector Operands;
        Operands.reserve(GEP->getNumIndices());
        for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
             II != IE; ++II)
          Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());

        // GEPs with a single 0 index can be merged with direct loads
        if (Operands.size() == 1 && Operands.front() == 0)
          Operands.clear();

        Function::arg_iterator TheArg = I2;
        for (ScalarizeTable::iterator It = ArgIndices.begin();
             *It != Operands; ++It, ++TheArg) {
          assert(It != ArgIndices.end() && "GEP not handled??");
        }

        std::string NewName = I->getName();
        for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
            NewName += "." + utostr(Operands[i]);
        }
        NewName += ".val";
        TheArg->setName(NewName);

        DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
              << "' of function '" << NF->getName() << "'\n");

        // All of the uses must be load instructions.  Replace them all with
        // the argument specified by ArgNo.
        while (!GEP->use_empty()) {
          LoadInst *L = cast<LoadInst>(GEP->use_back());
          L->replaceAllUsesWith(TheArg);
          AA.replaceWithNewValue(L, TheArg);
          L->eraseFromParent();
        }
        AA.deleteValue(GEP);
        GEP->eraseFromParent();
      }
    }

    // Increment I2 past all of the arguments added for this promoted pointer.
    for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
      ++I2;
  }

  // Notify the alias analysis implementation that we inserted a new argument.
  if (ExtraArgHack)
    AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), 
                 NF->arg_begin());


  // Tell the alias analysis that the old function is about to disappear.
  AA.replaceWithNewValue(F, NF);

  
  NF_CGN->stealCalledFunctionsFrom(CG[F]);
  
  // Now that the old function is dead, delete it.  If there is a dangling
  // reference to the CallgraphNode, just leave the dead function around for
  // someone else to nuke.
  CallGraphNode *CGN = CG[F];
  if (CGN->getNumReferences() == 0)
    delete CG.removeFunctionFromModule(CGN);
  else
    F->setLinkage(Function::ExternalLinkage);
  
  return NF_CGN;
}
Example #10
0
void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
  SmallVector<WeakVH, 16> DeadInsts;

  // Rewrite all induction variable expressions in terms of the canonical
  // induction variable.
  //
  // If there were induction variables of other sizes or offsets, manually
  // add the offsets to the primary induction variable and cast, avoiding
  // the need for the code evaluation methods to insert induction variables
  // of different sizes.
  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
    Value *Op = UI->getOperandValToReplace();
    const Type *UseTy = Op->getType();
    Instruction *User = UI->getUser();

    // Compute the final addrec to expand into code.
    const SCEV *AR = IU->getReplacementExpr(*UI);

    // Evaluate the expression out of the loop, if possible.
    if (!L->contains(UI->getUser())) {
      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
      if (ExitVal->isLoopInvariant(L))
        AR = ExitVal;
    }

    // FIXME: It is an extremely bad idea to indvar substitute anything more
    // complex than affine induction variables.  Doing so will put expensive
    // polynomial evaluations inside of the loop, and the str reduction pass
    // currently can only reduce affine polynomials.  For now just disable
    // indvar subst on anything more complex than an affine addrec, unless
    // it can be expanded to a trivial value.
    if (!isSafe(AR, L))
      continue;

    // Determine the insertion point for this user. By default, insert
    // immediately before the user. The SCEVExpander class will automatically
    // hoist loop invariants out of the loop. For PHI nodes, there may be
    // multiple uses, so compute the nearest common dominator for the
    // incoming blocks.
    Instruction *InsertPt = User;
    if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
      for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
        if (PHI->getIncomingValue(i) == Op) {
          if (InsertPt == User)
            InsertPt = PHI->getIncomingBlock(i)->getTerminator();
          else
            InsertPt =
              DT->findNearestCommonDominator(InsertPt->getParent(),
                                             PHI->getIncomingBlock(i))
                    ->getTerminator();
        }

    // Now expand it into actual Instructions and patch it into place.
    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);

    // Inform ScalarEvolution that this value is changing. The change doesn't
    // affect its value, but it does potentially affect which use lists the
    // value will be on after the replacement, which affects ScalarEvolution's
    // ability to walk use lists and drop dangling pointers when a value is
    // deleted.
    SE->forgetValue(User);

    // Patch the new value into place.
    if (Op->hasName())
      NewVal->takeName(Op);
    User->replaceUsesOfWith(Op, NewVal);
    UI->setOperandValToReplace(NewVal);
    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
                 << "   into = " << *NewVal << "\n");
    ++NumRemoved;
    Changed = true;

    // The old value may be dead now.
    DeadInsts.push_back(Op);
  }

  // Clear the rewriter cache, because values that are in the rewriter's cache
  // can be deleted in the loop below, causing the AssertingVH in the cache to
  // trigger.
  Rewriter.clear();
  // Now that we're done iterating through lists, clean up any instructions
  // which are now dead.
  while (!DeadInsts.empty())
    if (Instruction *Inst =
          dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
      RecursivelyDeleteTriviallyDeadInstructions(Inst);
}
void StraightLineStrengthReduce::rewriteCandidateWithBasis(
    const Candidate &C, const Candidate &Basis) {
  assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base &&
         C.Stride == Basis.Stride);
  // We run rewriteCandidateWithBasis on all candidates in a post-order, so the
  // basis of a candidate cannot be unlinked before the candidate.
  assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked");

  // An instruction can correspond to multiple candidates. Therefore, instead of
  // simply deleting an instruction when we rewrite it, we mark its parent as
  // nullptr (i.e. unlink it) so that we can skip the candidates whose
  // instruction is already rewritten.
  if (!C.Ins->getParent())
    return;

  IRBuilder<> Builder(C.Ins);
  bool BumpWithUglyGEP;
  Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
  Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
  switch (C.CandidateKind) {
  case Candidate::Add:
  case Candidate::Mul:
    // C = Basis + Bump
    if (BinaryOperator::isNeg(Bump)) {
      // If Bump is a neg instruction, emit C = Basis - (-Bump).
      Reduced =
          Builder.CreateSub(Basis.Ins, BinaryOperator::getNegArgument(Bump));
      // We only use the negative argument of Bump, and Bump itself may be
      // trivially dead.
      RecursivelyDeleteTriviallyDeadInstructions(Bump);
    } else {
      // It's tempting to preserve nsw on Bump and/or Reduced. However, it's
      // usually unsound, e.g.,
      //
      // X = (-2 +nsw 1) *nsw INT_MAX
      // Y = (-2 +nsw 3) *nsw INT_MAX
      //   =>
      // Y = X + 2 * INT_MAX
      //
      // Neither + and * in the resultant expression are nsw.
      Reduced = Builder.CreateAdd(Basis.Ins, Bump);
    }
    break;
  case Candidate::GEP:
    {
      Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
      bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
      if (BumpWithUglyGEP) {
        // C = (char *)Basis + Bump
        unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
        Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
        Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
        if (InBounds)
          Reduced =
              Builder.CreateInBoundsGEP(Builder.getInt8Ty(), Reduced, Bump);
        else
          Reduced = Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump);
        Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
      } else {
        // C = gep Basis, Bump
        // Canonicalize bump to pointer size.
        Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
        if (InBounds)
          Reduced = Builder.CreateInBoundsGEP(nullptr, Basis.Ins, Bump);
        else
          Reduced = Builder.CreateGEP(nullptr, Basis.Ins, Bump);
      }
    }
    break;
  default:
    llvm_unreachable("C.CandidateKind is invalid");
  };
  Reduced->takeName(C.Ins);
  C.Ins->replaceAllUsesWith(Reduced);
  // Unlink C.Ins so that we can skip other candidates also corresponding to
  // C.Ins. The actual deletion is postponed to the end of runOnFunction.
  C.Ins->removeFromParent();
  UnlinkedInstructions.push_back(C.Ins);
}
void InitializeSoftBound:: constructCheckHandlers(Module & module){

  Type* void_ty = Type::getVoidTy(module.getContext());

  Type* void_ptr_ty = PointerType::getUnqual(Type::getInt8Ty(module.getContext()));
  Type* size_ty = Type::getInt64Ty(module.getContext());

  module.getOrInsertFunction("__softboundcets_spatial_load_dereference_check",
                             void_ty, void_ptr_ty, void_ptr_ty, 
                             void_ptr_ty, size_ty, NULL);

  module.getOrInsertFunction("__softboundcets_spatial_store_dereference_check", 
                             void_ty, void_ptr_ty, void_ptr_ty, 
                             void_ptr_ty, size_ty, NULL);

  module.getOrInsertFunction("__softboundcets_temporal_load_dereference_check", 
                             void_ty, void_ptr_ty, size_ty, 
                             void_ptr_ty, void_ptr_ty, NULL);

  module.getOrInsertFunction("__softboundcets_temporal_store_dereference_check", 
                             void_ty, void_ptr_ty, size_ty, 
                             void_ptr_ty, void_ptr_ty, NULL);


  Function* global_init = (Function *) module.getOrInsertFunction("__softboundcets_global_init", 
                                                                  void_ty, NULL);

  global_init->setDoesNotThrow();
  global_init->setLinkage(GlobalValue::InternalLinkage);

  BasicBlock* BB = BasicBlock::Create(module.getContext(), 
                                      "entry", global_init);
  
  Function* softboundcets_init = (Function*) module.getOrInsertFunction("__softboundcets_init", void_ty, Type::getInt32Ty(module.getContext()), NULL);

  
  SmallVector<Value*, 8> args;
  Constant * const_one = ConstantInt::get(Type::getInt32Ty(module.getContext()), 1);
  
  args.push_back(const_one);
  Instruction* ret = ReturnInst::Create(module.getContext(), BB);
  
  CallInst::Create(softboundcets_init, args, "", ret);



  Type * Int32Type = IntegerType::getInt32Ty(module.getContext());
  std::vector<Constant *> CtorInits;
  CtorInits.push_back(ConstantInt::get(Int32Type, 0));
  CtorInits.push_back(global_init);
  StructType * ST = ConstantStruct::getTypeForElements(CtorInits, false);
  Constant * RuntimeCtorInit = ConstantStruct::get(ST, CtorInits);

  //
  // Get the current set of static global constructors and add the new ctor
  // to the list.
  //
  std::vector<Constant *> CurrentCtors;
  GlobalVariable * GVCtor = module.getNamedGlobal ("llvm.global_ctors");
  if (GVCtor) {
    if (Constant * C = GVCtor->getInitializer()) {
      for (unsigned index = 0; index < C->getNumOperands(); ++index) {
        CurrentCtors.push_back (dyn_cast<Constant>(C->getOperand (index)));
      }
    }
  }
  CurrentCtors.push_back(RuntimeCtorInit);

  //
  // Create a new initializer.
  //
  ArrayType * AT = ArrayType::get (RuntimeCtorInit-> getType(),
                                   CurrentCtors.size());
  Constant * NewInit = ConstantArray::get (AT, CurrentCtors);

  //
  // Create the new llvm.global_ctors global variable and remove the old one
  // if it existed.
  //
  Value * newGVCtor = new GlobalVariable (module,
                                          NewInit->getType(),
                                          false,
                                          GlobalValue::AppendingLinkage,
                                          NewInit,
                                          "llvm.global_ctors");
  if (GVCtor) {
    newGVCtor->takeName (GVCtor);
    GVCtor->eraseFromParent ();
  }



}
/// FoldSelectIntoOp - Try fold the select into one of the operands to
/// facilitate further optimization.
Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
                                            Value *FalseVal) {
  // See the comment above GetSelectFoldableOperands for a description of the
  // transformation we are doing here.
  if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) {
    if (TVI->hasOneUse() && TVI->getNumOperands() == 2 &&
        !isa<Constant>(FalseVal)) {
      if (unsigned SFO = GetSelectFoldableOperands(TVI)) {
        unsigned OpToFold = 0;
        if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
          OpToFold = 1;
        } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
          OpToFold = 2;
        }

        if (OpToFold) {
          Constant *C = GetSelectFoldableConstant(TVI);
          Value *OOp = TVI->getOperand(2-OpToFold);
          // Avoid creating select between 2 constants unless it's selecting
          // between 0, 1 and -1.
          if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
            Value *NewSel = Builder->CreateSelect(SI.getCondition(), OOp, C);
            NewSel->takeName(TVI);
            BinaryOperator *TVI_BO = cast<BinaryOperator>(TVI);
            BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(),
                                                        FalseVal, NewSel);
            if (isa<PossiblyExactOperator>(BO))
              BO->setIsExact(TVI_BO->isExact());
            if (isa<OverflowingBinaryOperator>(BO)) {
              BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap());
              BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap());
            }
            return BO;
          }
        }
      }
    }
  }

  if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) {
    if (FVI->hasOneUse() && FVI->getNumOperands() == 2 &&
        !isa<Constant>(TrueVal)) {
      if (unsigned SFO = GetSelectFoldableOperands(FVI)) {
        unsigned OpToFold = 0;
        if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
          OpToFold = 1;
        } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
          OpToFold = 2;
        }

        if (OpToFold) {
          Constant *C = GetSelectFoldableConstant(FVI);
          Value *OOp = FVI->getOperand(2-OpToFold);
          // Avoid creating select between 2 constants unless it's selecting
          // between 0, 1 and -1.
          if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
            Value *NewSel = Builder->CreateSelect(SI.getCondition(), C, OOp);
            NewSel->takeName(FVI);
            BinaryOperator *FVI_BO = cast<BinaryOperator>(FVI);
            BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(),
                                                        TrueVal, NewSel);
            if (isa<PossiblyExactOperator>(BO))
              BO->setIsExact(FVI_BO->isExact());
            if (isa<OverflowingBinaryOperator>(BO)) {
              BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap());
              BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap());
            }
            return BO;
          }
        }
      }
    }
  }

  return nullptr;
}
Example #14
0
/// runOnFunction - Insert code to maintain the shadow stack.
bool ShadowStackGCLowering::runOnFunction(Function &F) {
  // Quick exit for functions that do not use the shadow stack GC.
  if (!F.hasGC() ||
      F.getGC() != std::string("shadow-stack"))
    return false;
  
  LLVMContext &Context = F.getContext();

  // Find calls to llvm.gcroot.
  CollectRoots(F);

  // If there are no roots in this function, then there is no need to add a
  // stack map entry for it.
  if (Roots.empty())
    return false;

  // Build the constant map and figure the type of the shadow stack entry.
  Value *FrameMap = GetFrameMap(F);
  Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);

  // Build the shadow stack entry at the very start of the function.
  BasicBlock::iterator IP = F.getEntryBlock().begin();
  IRBuilder<> AtEntry(IP->getParent(), IP);

  Instruction *StackEntry =
      AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame");

  while (isa<AllocaInst>(IP))
    ++IP;
  AtEntry.SetInsertPoint(IP->getParent(), IP);

  // Initialize the map pointer and load the current head of the shadow stack.
  Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
  Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
                                       StackEntry, 0, 1, "gc_frame.map");
  AtEntry.CreateStore(FrameMap, EntryMapPtr);

  // After all the allocas...
  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
    // For each root, find the corresponding slot in the aggregate...
    Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
                               StackEntry, 1 + I, "gc_root");

    // And use it in lieu of the alloca.
    AllocaInst *OriginalAlloca = Roots[I].second;
    SlotPtr->takeName(OriginalAlloca);
    OriginalAlloca->replaceAllUsesWith(SlotPtr);
  }

  // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
  // really necessary (the collector would never see the intermediate state at
  // runtime), but it's nicer not to push the half-initialized entry onto the
  // shadow stack.
  while (isa<StoreInst>(IP))
    ++IP;
  AtEntry.SetInsertPoint(IP->getParent(), IP);

  // Push the entry onto the shadow stack.
  Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
                                        StackEntry, 0, 0, "gc_frame.next");
  Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
                                      StackEntry, 0, "gc_newhead");
  AtEntry.CreateStore(CurrentHead, EntryNextPtr);
  AtEntry.CreateStore(NewHeadVal, Head);

  // For each instruction that escapes...
  EscapeEnumerator EE(F, "gc_cleanup");
  while (IRBuilder<> *AtExit = EE.Next()) {
    // Pop the entry from the shadow stack. Don't reuse CurrentHead from
    // AtEntry, since that would make the value live for the entire function.
    Instruction *EntryNextPtr2 =
        CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
                  "gc_frame.next");
    Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
    AtExit->CreateStore(SavedHead, Head);
  }

  // Delete the original allocas (which are no longer used) and the intrinsic
  // calls (which are no longer valid). Doing this last avoids invalidating
  // iterators.
  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
    Roots[I].first->eraseFromParent();
    Roots[I].second->eraseFromParent();
  }

  Roots.clear();
  return true;
}
Example #15
0
Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
  bool Changed = SimplifyAssociativeOrCommutative(I);
  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);

  if (Value *V = SimplifyVectorOp(I))
    return ReplaceInstUsesWith(I, V);

  if (isa<Constant>(Op0))
    std::swap(Op0, Op1);

  if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL))
    return ReplaceInstUsesWith(I, V);

  bool AllowReassociate = I.hasUnsafeAlgebra();

  // Simplify mul instructions with a constant RHS.
  if (isa<Constant>(Op1)) {
    // Try to fold constant mul into select arguments.
    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
      if (Instruction *R = FoldOpIntoSelect(I, SI))
        return R;

    if (isa<PHINode>(Op0))
      if (Instruction *NV = FoldOpIntoPhi(I))
        return NV;

    // (fmul X, -1.0) --> (fsub -0.0, X)
    if (match(Op1, m_SpecificFP(-1.0))) {
      Constant *NegZero = ConstantFP::getNegativeZero(Op1->getType());
      Instruction *RI = BinaryOperator::CreateFSub(NegZero, Op0);
      RI->copyFastMathFlags(&I);
      return RI;
    }

    Constant *C = cast<Constant>(Op1);
    if (AllowReassociate && isFiniteNonZeroFp(C)) {
      // Let MDC denote an expression in one of these forms:
      // X * C, C/X, X/C, where C is a constant.
      //
      // Try to simplify "MDC * Constant"
      if (isFMulOrFDivWithConstant(Op0))
        if (Value *V = foldFMulConst(cast<Instruction>(Op0), C, &I))
          return ReplaceInstUsesWith(I, V);

      // (MDC +/- C1) * C => (MDC * C) +/- (C1 * C)
      Instruction *FAddSub = dyn_cast<Instruction>(Op0);
      if (FAddSub &&
          (FAddSub->getOpcode() == Instruction::FAdd ||
           FAddSub->getOpcode() == Instruction::FSub)) {
        Value *Opnd0 = FAddSub->getOperand(0);
        Value *Opnd1 = FAddSub->getOperand(1);
        Constant *C0 = dyn_cast<Constant>(Opnd0);
        Constant *C1 = dyn_cast<Constant>(Opnd1);
        bool Swap = false;
        if (C0) {
          std::swap(C0, C1);
          std::swap(Opnd0, Opnd1);
          Swap = true;
        }

        if (C1 && isFiniteNonZeroFp(C1) && isFMulOrFDivWithConstant(Opnd0)) {
          Value *M1 = ConstantExpr::getFMul(C1, C);
          Value *M0 = isNormalFp(cast<Constant>(M1)) ?
                      foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
                      nullptr;
          if (M0 && M1) {
            if (Swap && FAddSub->getOpcode() == Instruction::FSub)
              std::swap(M0, M1);

            Instruction *RI = (FAddSub->getOpcode() == Instruction::FAdd)
                                  ? BinaryOperator::CreateFAdd(M0, M1)
                                  : BinaryOperator::CreateFSub(M0, M1);
            RI->copyFastMathFlags(&I);
            return RI;
          }
        }
      }
    }
  }


  // Under unsafe algebra do:
  // X * log2(0.5*Y) = X*log2(Y) - X
  if (I.hasUnsafeAlgebra()) {
    Value *OpX = nullptr;
    Value *OpY = nullptr;
    IntrinsicInst *Log2;
    detectLog2OfHalf(Op0, OpY, Log2);
    if (OpY) {
      OpX = Op1;
    } else {
      detectLog2OfHalf(Op1, OpY, Log2);
      if (OpY) {
        OpX = Op0;
      }
    }
    // if pattern detected emit alternate sequence
    if (OpX && OpY) {
      BuilderTy::FastMathFlagGuard Guard(*Builder);
      Builder->SetFastMathFlags(Log2->getFastMathFlags());
      Log2->setArgOperand(0, OpY);
      Value *FMulVal = Builder->CreateFMul(OpX, Log2);
      Value *FSub = Builder->CreateFSub(FMulVal, OpX);
      FSub->takeName(&I);
      return ReplaceInstUsesWith(I, FSub);
    }
  }

  // Handle symmetric situation in a 2-iteration loop
  Value *Opnd0 = Op0;
  Value *Opnd1 = Op1;
  for (int i = 0; i < 2; i++) {
    bool IgnoreZeroSign = I.hasNoSignedZeros();
    if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
      BuilderTy::FastMathFlagGuard Guard(*Builder);
      Builder->SetFastMathFlags(I.getFastMathFlags());

      Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
      Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);

      // -X * -Y => X*Y
      if (N1) {
        Value *FMul = Builder->CreateFMul(N0, N1);
        FMul->takeName(&I);
        return ReplaceInstUsesWith(I, FMul);
      }

      if (Opnd0->hasOneUse()) {
        // -X * Y => -(X*Y) (Promote negation as high as possible)
        Value *T = Builder->CreateFMul(N0, Opnd1);
        Value *Neg = Builder->CreateFNeg(T);
        Neg->takeName(&I);
        return ReplaceInstUsesWith(I, Neg);
      }
    }

    // (X*Y) * X => (X*X) * Y where Y != X
    //  The purpose is two-fold:
    //   1) to form a power expression (of X).
    //   2) potentially shorten the critical path: After transformation, the
    //  latency of the instruction Y is amortized by the expression of X*X,
    //  and therefore Y is in a "less critical" position compared to what it
    //  was before the transformation.
    //
    if (AllowReassociate) {
      Value *Opnd0_0, *Opnd0_1;
      if (Opnd0->hasOneUse() &&
          match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) {
        Value *Y = nullptr;
        if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1)
          Y = Opnd0_1;
        else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1)
          Y = Opnd0_0;

        if (Y) {
          BuilderTy::FastMathFlagGuard Guard(*Builder);
          Builder->SetFastMathFlags(I.getFastMathFlags());
          Value *T = Builder->CreateFMul(Opnd1, Opnd1);

          Value *R = Builder->CreateFMul(T, Y);
          R->takeName(&I);
          return ReplaceInstUsesWith(I, R);
        }
      }
    }

    if (!isa<Constant>(Op1))
      std::swap(Opnd0, Opnd1);
    else
      break;
  }

  return Changed ? &I : nullptr;
}
// Creates the helper function that will do the setjmp() call and
// function call for implementing Invoke.  Creates the call to the
// helper function.  Returns a Value which is zero on the normal
// execution path and non-zero if the landingpad block should be
// entered.
Value *FuncRewriter::createSetjmpWrappedCall(InvokeInst *Invoke) {
  Type *I32 = Type::getInt32Ty(Func->getContext());

  // Allocate space for storing the invoke's result temporarily (so
  // that the helper function can return multiple values).  We don't
  // need to do this if the result is unused, and we can't if its type
  // is void.
  Instruction *ResultAlloca = NULL;
  if (!Invoke->use_empty()) {
    ResultAlloca = new AllocaInst(Invoke->getType(), "invoke_result_ptr");
    Func->getEntryBlock().getInstList().push_front(ResultAlloca);
  }

  // Create type for the helper function.
  SmallVector<Type *, 10> ArgTypes;
  for (unsigned I = 0, E = Invoke->getNumArgOperands(); I < E; ++I)
    ArgTypes.push_back(Invoke->getArgOperand(I)->getType());
  ArgTypes.push_back(Invoke->getCalledValue()->getType());
  ArgTypes.push_back(FrameJmpBuf->getType());
  if (ResultAlloca)
    ArgTypes.push_back(Invoke->getType()->getPointerTo());
  FunctionType *FTy = FunctionType::get(I32, ArgTypes, false);

  // Create the helper function.
  Function *HelperFunc = Function::Create(
      FTy, GlobalValue::InternalLinkage, Func->getName() + "_setjmp_caller");
  Func->getParent()->getFunctionList().insertAfter(Func, HelperFunc);
  BasicBlock *EntryBB = BasicBlock::Create(Func->getContext(), "", HelperFunc);
  BasicBlock *NormalBB = BasicBlock::Create(Func->getContext(), "normal",
                                            HelperFunc);
  BasicBlock *ExceptionBB = BasicBlock::Create(Func->getContext(), "exception",
                                               HelperFunc);

  // Unpack the helper function's arguments.
  Function::arg_iterator ArgIter = HelperFunc->arg_begin();
  SmallVector<Value *, 10> InnerCallArgs;
  for (unsigned I = 0, E = Invoke->getNumArgOperands(); I < E; ++I) {
    ArgIter->setName("arg");
    InnerCallArgs.push_back(ArgIter++);
  }
  Argument *CalleeArg = ArgIter++;
  Argument *JmpBufArg = ArgIter++;
  CalleeArg->setName("func_ptr");
  JmpBufArg->setName("jmp_buf");

  // Create setjmp() call.
  Value *SetjmpArgs[] = { JmpBufArg };
  CallInst *SetjmpCall = CallInst::Create(SetjmpIntrinsic, SetjmpArgs,
                                          "invoke_sj", EntryBB);
  CopyDebug(SetjmpCall, Invoke);
  // Setting the "returns_twice" attribute here prevents optimization
  // passes from inlining HelperFunc into its caller.
  SetjmpCall->setCanReturnTwice();
  // Check setjmp()'s result.
  Value *IsZero = CopyDebug(new ICmpInst(*EntryBB, CmpInst::ICMP_EQ, SetjmpCall,
                                         ConstantInt::get(I32, 0),
                                         "invoke_sj_is_zero"), Invoke);
  CopyDebug(BranchInst::Create(NormalBB, ExceptionBB, IsZero, EntryBB), Invoke);
  // Handle the normal, non-exceptional code path.
  CallInst *InnerCall = CallInst::Create(CalleeArg, InnerCallArgs, "",
                                         NormalBB);
  CopyDebug(InnerCall, Invoke);
  InnerCall->setAttributes(Invoke->getAttributes());
  InnerCall->setCallingConv(Invoke->getCallingConv());
  if (ResultAlloca) {
    InnerCall->setName("result");
    Argument *ResultArg = ArgIter++;
    ResultArg->setName("result_ptr");
    CopyDebug(new StoreInst(InnerCall, ResultArg, NormalBB), Invoke);
  }
  ReturnInst::Create(Func->getContext(), ConstantInt::get(I32, 0), NormalBB);
  // Handle the exceptional code path.
  ReturnInst::Create(Func->getContext(), ConstantInt::get(I32, 1), ExceptionBB);

  // Create the outer call to the helper function.
  SmallVector<Value *, 10> OuterCallArgs;
  for (unsigned I = 0, E = Invoke->getNumArgOperands(); I < E; ++I)
    OuterCallArgs.push_back(Invoke->getArgOperand(I));
  OuterCallArgs.push_back(Invoke->getCalledValue());
  OuterCallArgs.push_back(FrameJmpBuf);
  if (ResultAlloca)
    OuterCallArgs.push_back(ResultAlloca);
  CallInst *OuterCall = CallInst::Create(HelperFunc, OuterCallArgs,
                                         "invoke_is_exc", Invoke);
  CopyDebug(OuterCall, Invoke);

  // Retrieve the function return value stored in the alloca.  We only
  // need to do this on the non-exceptional path, but we currently do
  // it unconditionally because that is simpler.
  if (ResultAlloca) {
    Value *Result = new LoadInst(ResultAlloca, "", Invoke);
    Result->takeName(Invoke);
    Invoke->replaceAllUsesWith(Result);
  }
  return OuterCall;
}
Example #17
0
Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
                                               BinaryOperator &I) {
  bool isLeftShift = I.getOpcode() == Instruction::Shl;

  ConstantInt *COp1 = nullptr;
  if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(Op1))
    COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue());
  else if (ConstantVector *CV = dyn_cast<ConstantVector>(Op1))
    COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue());
  else
    COp1 = dyn_cast<ConstantInt>(Op1);

  if (!COp1)
    return nullptr;

  // See if we can propagate this shift into the input, this covers the trivial
  // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
  if (I.getOpcode() != Instruction::AShr &&
      CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this)) {
    DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
              " to eliminate shift:\n  IN: " << *Op0 << "\n  SH: " << I <<"\n");

    return ReplaceInstUsesWith(I,
                 GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this));
  }

  // See if we can simplify any instructions used by the instruction whose sole
  // purpose is to compute bits we don't care about.
  uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();

  assert(!COp1->uge(TypeBits) &&
         "Shift over the type width should have been removed already");

  // ((X*C1) << C2) == (X * (C1 << C2))
  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
    if (BO->getOpcode() == Instruction::Mul && isLeftShift)
      if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
        return BinaryOperator::CreateMul(BO->getOperand(0),
                                        ConstantExpr::getShl(BOOp, Op1));

  // Try to fold constant and into select arguments.
  if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
    if (Instruction *R = FoldOpIntoSelect(I, SI))
      return R;
  if (isa<PHINode>(Op0))
    if (Instruction *NV = FoldOpIntoPhi(I))
      return NV;

  // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
  if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
    Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
    // If 'shift2' is an ashr, we would have to get the sign bit into a funny
    // place.  Don't try to do this transformation in this case.  Also, we
    // require that the input operand is a shift-by-constant so that we have
    // confidence that the shifts will get folded together.  We could do this
    // xform in more cases, but it is unlikely to be profitable.
    if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
        isa<ConstantInt>(TrOp->getOperand(1))) {
      // Okay, we'll do this xform.  Make the shift of shift.
      Constant *ShAmt = ConstantExpr::getZExt(COp1, TrOp->getType());
      // (shift2 (shift1 & 0x00FF), c2)
      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());

      // For logical shifts, the truncation has the effect of making the high
      // part of the register be zeros.  Emulate this by inserting an AND to
      // clear the top bits as needed.  This 'and' will usually be zapped by
      // other xforms later if dead.
      unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
      unsigned DstSize = TI->getType()->getScalarSizeInBits();
      APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));

      // The mask we constructed says what the trunc would do if occurring
      // between the shifts.  We want to know the effect *after* the second
      // shift.  We know that it is a logical shift by a constant, so adjust the
      // mask as appropriate.
      if (I.getOpcode() == Instruction::Shl)
        MaskV <<= COp1->getZExtValue();
      else {
        assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
        MaskV = MaskV.lshr(COp1->getZExtValue());
      }

      // shift1 & 0x00FF
      Value *And = Builder->CreateAnd(NSh,
                                      ConstantInt::get(I.getContext(), MaskV),
                                      TI->getName());

      // Return the value truncated to the interesting size.
      return new TruncInst(And, I.getType());
    }
  }

  if (Op0->hasOneUse()) {
    if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
      // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
      Value *V1, *V2;
      ConstantInt *CC;
      switch (Op0BO->getOpcode()) {
      default: break;
      case Instruction::Add:
      case Instruction::And:
      case Instruction::Or:
      case Instruction::Xor: {
        // These operators commute.
        // Turn (Y + (X >> C)) << C  ->  (X + (Y << C)) & (~0 << C)
        if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
            match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
                  m_Specific(Op1)))) {
          Value *YS =         // (Y << C)
            Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
          // (X + (Y << C))
          Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
                                          Op0BO->getOperand(1)->getName());
          uint32_t Op1Val = COp1->getLimitedValue(TypeBits);

          APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
          Constant *Mask = ConstantInt::get(I.getContext(), Bits);
          if (VectorType *VT = dyn_cast<VectorType>(X->getType()))
            Mask = ConstantVector::getSplat(VT->getNumElements(), Mask);
          return BinaryOperator::CreateAnd(X, Mask);
        }

        // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
        Value *Op0BOOp1 = Op0BO->getOperand(1);
        if (isLeftShift && Op0BOOp1->hasOneUse() &&
            match(Op0BOOp1,
                  m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
                        m_ConstantInt(CC)))) {
          Value *YS =   // (Y << C)
            Builder->CreateShl(Op0BO->getOperand(0), Op1,
                                         Op0BO->getName());
          // X & (CC << C)
          Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
                                         V1->getName()+".mask");
          return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
        }
      }

      // FALL THROUGH.
      case Instruction::Sub: {
        // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
        if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
            match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
                  m_Specific(Op1)))) {
          Value *YS =  // (Y << C)
            Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
          // (X + (Y << C))
          Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
                                          Op0BO->getOperand(0)->getName());
          uint32_t Op1Val = COp1->getLimitedValue(TypeBits);

          APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
          Constant *Mask = ConstantInt::get(I.getContext(), Bits);
          if (VectorType *VT = dyn_cast<VectorType>(X->getType()))
            Mask = ConstantVector::getSplat(VT->getNumElements(), Mask);
          return BinaryOperator::CreateAnd(X, Mask);
        }

        // Turn (((X >> C)&CC) + Y) << C  ->  (X + (Y << C)) & (CC << C)
        if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
            match(Op0BO->getOperand(0),
                  m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))),
                        m_ConstantInt(CC))) && V2 == Op1) {
          Value *YS = // (Y << C)
            Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
          // X & (CC << C)
          Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
                                         V1->getName()+".mask");

          return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
        }

        break;
      }
      }


      // If the operand is an bitwise operator with a constant RHS, and the
      // shift is the only use, we can pull it out of the shift.
      if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
        bool isValid = true;     // Valid only for And, Or, Xor
        bool highBitSet = false; // Transform if high bit of constant set?

        switch (Op0BO->getOpcode()) {
        default: isValid = false; break;   // Do not perform transform!
        case Instruction::Add:
          isValid = isLeftShift;
          break;
        case Instruction::Or:
        case Instruction::Xor:
          highBitSet = false;
          break;
        case Instruction::And:
          highBitSet = true;
          break;
        }

        // If this is a signed shift right, and the high bit is modified
        // by the logical operation, do not perform the transformation.
        // The highBitSet boolean indicates the value of the high bit of
        // the constant which would cause it to be modified for this
        // operation.
        //
        if (isValid && I.getOpcode() == Instruction::AShr)
          isValid = Op0C->getValue()[TypeBits-1] == highBitSet;

        if (isValid) {
          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);

          Value *NewShift =
            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
          NewShift->takeName(Op0BO);

          return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
                                        NewRHS);
        }
      }
    }
  }

  // Find out if this is a shift of a shift by a constant.
  BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
  if (ShiftOp && !ShiftOp->isShift())
    ShiftOp = nullptr;

  if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {

    // This is a constant shift of a constant shift. Be careful about hiding
    // shl instructions behind bit masks. They are used to represent multiplies
    // by a constant, and it is important that simple arithmetic expressions
    // are still recognizable by scalar evolution.
    //
    // The transforms applied to shl are very similar to the transforms applied
    // to mul by constant. We can be more aggressive about optimizing right
    // shifts.
    //
    // Combinations of right and left shifts will still be optimized in
    // DAGCombine where scalar evolution no longer applies.

    ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
    uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
    uint32_t ShiftAmt2 = COp1->getLimitedValue(TypeBits);
    assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
    if (ShiftAmt1 == 0) return nullptr;  // Will be simplified in the future.
    Value *X = ShiftOp->getOperand(0);

    IntegerType *Ty = cast<IntegerType>(I.getType());

    // Check for (X << c1) << c2  and  (X >> c1) >> c2
    if (I.getOpcode() == ShiftOp->getOpcode()) {
      uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
      // If this is oversized composite shift, then unsigned shifts get 0, ashr
      // saturates.
      if (AmtSum >= TypeBits) {
        if (I.getOpcode() != Instruction::AShr)
          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
        AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
      }

      return BinaryOperator::Create(I.getOpcode(), X,
                                    ConstantInt::get(Ty, AmtSum));
    }

    if (ShiftAmt1 == ShiftAmt2) {
      // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
      if (I.getOpcode() == Instruction::LShr &&
          ShiftOp->getOpcode() == Instruction::Shl) {
        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
        return BinaryOperator::CreateAnd(X,
                                        ConstantInt::get(I.getContext(), Mask));
      }
    } else if (ShiftAmt1 < ShiftAmt2) {
      uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;

      // (X >>?,exact C1) << C2 --> X << (C2-C1)
      // The inexact version is deferred to DAGCombine so we don't hide shl
      // behind a bit mask.
      if (I.getOpcode() == Instruction::Shl &&
          ShiftOp->getOpcode() != Instruction::Shl &&
          ShiftOp->isExact()) {
        assert(ShiftOp->getOpcode() == Instruction::LShr ||
               ShiftOp->getOpcode() == Instruction::AShr);
        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
        BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
                                                        X, ShiftDiffCst);
        NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
        NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
        return NewShl;
      }

      // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
      if (I.getOpcode() == Instruction::LShr &&
          ShiftOp->getOpcode() == Instruction::Shl) {
        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
        // (X <<nuw C1) >>u C2 --> X >>u (C2-C1)
        if (ShiftOp->hasNoUnsignedWrap()) {
          BinaryOperator *NewLShr = BinaryOperator::Create(Instruction::LShr,
                                                           X, ShiftDiffCst);
          NewLShr->setIsExact(I.isExact());
          return NewLShr;
        }
        Value *Shift = Builder->CreateLShr(X, ShiftDiffCst);

        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
        return BinaryOperator::CreateAnd(Shift,
                                         ConstantInt::get(I.getContext(),Mask));
      }

      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
      // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
      if (I.getOpcode() == Instruction::AShr &&
          ShiftOp->getOpcode() == Instruction::Shl) {
        if (ShiftOp->hasNoSignedWrap()) {
          // (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
          ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
          BinaryOperator *NewAShr = BinaryOperator::Create(Instruction::AShr,
                                                           X, ShiftDiffCst);
          NewAShr->setIsExact(I.isExact());
          return NewAShr;
        }
      }
    } else {
      assert(ShiftAmt2 < ShiftAmt1);
      uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;

      // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
      // The inexact version is deferred to DAGCombine so we don't hide shl
      // behind a bit mask.
      if (I.getOpcode() == Instruction::Shl &&
          ShiftOp->getOpcode() != Instruction::Shl &&
          ShiftOp->isExact()) {
        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
        BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
                                                        X, ShiftDiffCst);
        NewShr->setIsExact(true);
        return NewShr;
      }

      // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
      if (I.getOpcode() == Instruction::LShr &&
          ShiftOp->getOpcode() == Instruction::Shl) {
        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
        if (ShiftOp->hasNoUnsignedWrap()) {
          // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2)
          BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
                                                          X, ShiftDiffCst);
          NewShl->setHasNoUnsignedWrap(true);
          return NewShl;
        }
        Value *Shift = Builder->CreateShl(X, ShiftDiffCst);

        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
        return BinaryOperator::CreateAnd(Shift,
                                         ConstantInt::get(I.getContext(),Mask));
      }

      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
      // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
      if (I.getOpcode() == Instruction::AShr &&
          ShiftOp->getOpcode() == Instruction::Shl) {
        if (ShiftOp->hasNoSignedWrap()) {
          // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2)
          ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
          BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
                                                          X, ShiftDiffCst);
          NewShl->setHasNoSignedWrap(true);
          return NewShl;
        }
      }
    }
  }
  return nullptr;
}
Example #18
0
// Insert an intrinsic for fast fdiv for safe math situations where we can
// reduce precision. Leave fdiv for situations where the generic node is
// expected to be optimized.
bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
  Type *Ty = FDiv.getType();

  // TODO: Handle half
  if (!Ty->getScalarType()->isFloatTy())
    return false;

  MDNode *FPMath = FDiv.getMetadata(LLVMContext::MD_fpmath);
  if (!FPMath)
    return false;

  const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
  float ULP = FPOp->getFPAccuracy();
  if (ULP < 2.5f)
    return false;

  FastMathFlags FMF = FPOp->getFastMathFlags();
  bool UnsafeDiv = HasUnsafeFPMath || FMF.unsafeAlgebra() ||
                                      FMF.allowReciprocal();
  if (ST->hasFP32Denormals() && !UnsafeDiv)
    return false;

  IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()), FPMath);
  Builder.setFastMathFlags(FMF);
  Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());

  const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo();
  Function *Decl
    = II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {});

  Value *Num = FDiv.getOperand(0);
  Value *Den = FDiv.getOperand(1);

  Value *NewFDiv = nullptr;

  if (VectorType *VT = dyn_cast<VectorType>(Ty)) {
    NewFDiv = UndefValue::get(VT);

    // FIXME: Doesn't do the right thing for cases where the vector is partially
    // constant. This works when the scalarizer pass is run first.
    for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
      Value *NumEltI = Builder.CreateExtractElement(Num, I);
      Value *DenEltI = Builder.CreateExtractElement(Den, I);
      Value *NewElt;

      if (shouldKeepFDivF32(NumEltI, UnsafeDiv)) {
        NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
      } else {
        NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI });
      }

      NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
    }
  } else {
    if (!shouldKeepFDivF32(Num, UnsafeDiv))
      NewFDiv = Builder.CreateCall(Decl, { Num, Den });
  }

  if (NewFDiv) {
    FDiv.replaceAllUsesWith(NewFDiv);
    NewFDiv->takeName(&FDiv);
    FDiv.eraseFromParent();
  }

  return true;
}
Example #19
0
/// DoPromotion - This method actually performs the promotion of the specified
/// arguments, and returns the new function.  At this point, we know that it's
/// safe to do so.
static Function *
doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
            SmallPtrSetImpl<Argument *> &ByValArgsToTransform,
            Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
                ReplaceCallSite) {
  // Start by computing a new prototype for the function, which is the same as
  // the old function, but has modified arguments.
  FunctionType *FTy = F->getFunctionType();
  std::vector<Type *> Params;

  using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>;

  // ScalarizedElements - If we are promoting a pointer that has elements
  // accessed out of it, keep track of which elements are accessed so that we
  // can add one argument for each.
  //
  // Arguments that are directly loaded will have a zero element value here, to
  // handle cases where there are both a direct load and GEP accesses.
  std::map<Argument *, ScalarizeTable> ScalarizedElements;

  // OriginalLoads - Keep track of a representative load instruction from the
  // original function so that we can tell the alias analysis implementation
  // what the new GEP/Load instructions we are inserting look like.
  // We need to keep the original loads for each argument and the elements
  // of the argument that are accessed.
  std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads;

  // Attribute - Keep track of the parameter attributes for the arguments
  // that we are *not* promoting. For the ones that we do promote, the parameter
  // attributes are lost
  SmallVector<AttributeSet, 8> ArgAttrVec;
  AttributeList PAL = F->getAttributes();

  // First, determine the new argument list
  unsigned ArgNo = 0;
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
       ++I, ++ArgNo) {
    if (ByValArgsToTransform.count(&*I)) {
      // Simple byval argument? Just add all the struct element types.
      Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      StructType *STy = cast<StructType>(AgTy);
      Params.insert(Params.end(), STy->element_begin(), STy->element_end());
      ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(),
                        AttributeSet());
      ++NumByValArgsPromoted;
    } else if (!ArgsToPromote.count(&*I)) {
      // Unchanged argument
      Params.push_back(I->getType());
      ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo));
    } else if (I->use_empty()) {
      // Dead argument (which are always marked as promotable)
      ++NumArgumentsDead;

      // There may be remaining metadata uses of the argument for things like
      // llvm.dbg.value. Replace them with undef.
      I->replaceAllUsesWith(UndefValue::get(I->getType()));
    } else {
      // Okay, this is being promoted. This means that the only uses are loads
      // or GEPs which are only used by loads

      // In this table, we will track which indices are loaded from the argument
      // (where direct loads are tracked as no indices).
      ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
      for (User *U : I->users()) {
        Instruction *UI = cast<Instruction>(U);
        Type *SrcTy;
        if (LoadInst *L = dyn_cast<LoadInst>(UI))
          SrcTy = L->getType();
        else
          SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
        IndicesVector Indices;
        Indices.reserve(UI->getNumOperands() - 1);
        // Since loads will only have a single operand, and GEPs only a single
        // non-index operand, this will record direct loads without any indices,
        // and gep+loads with the GEP indices.
        for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
             II != IE; ++II)
          Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
        // GEPs with a single 0 index can be merged with direct loads
        if (Indices.size() == 1 && Indices.front() == 0)
          Indices.clear();
        ArgIndices.insert(std::make_pair(SrcTy, Indices));
        LoadInst *OrigLoad;
        if (LoadInst *L = dyn_cast<LoadInst>(UI))
          OrigLoad = L;
        else
          // Take any load, we will use it only to update Alias Analysis
          OrigLoad = cast<LoadInst>(UI->user_back());
        OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
      }

      // Add a parameter to the function for each element passed in.
      for (const auto &ArgIndex : ArgIndices) {
        // not allowed to dereference ->begin() if size() is 0
        Params.push_back(GetElementPtrInst::getIndexedType(
            cast<PointerType>(I->getType()->getScalarType())->getElementType(),
            ArgIndex.second));
        ArgAttrVec.push_back(AttributeSet());
        assert(Params.back());
      }

      if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
        ++NumArgumentsPromoted;
      else
        ++NumAggregatesPromoted;
    }
  }

  Type *RetTy = FTy->getReturnType();

  // Construct the new function type using the new arguments.
  FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());

  // Create the new function body and insert it into the module.
  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
  NF->copyAttributesFrom(F);

  // Patch the pointer to LLVM function in debug info descriptor.
  NF->setSubprogram(F->getSubprogram());
  F->setSubprogram(nullptr);

  DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
               << "From: " << *F);

  // Recompute the parameter attributes list based on the new arguments for
  // the function.
  NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(),
                                       PAL.getRetAttributes(), ArgAttrVec));
  ArgAttrVec.clear();

  F->getParent()->getFunctionList().insert(F->getIterator(), NF);
  NF->takeName(F);

  // Loop over all of the callers of the function, transforming the call sites
  // to pass in the loaded pointers.
  //
  SmallVector<Value *, 16> Args;
  while (!F->use_empty()) {
    CallSite CS(F->user_back());
    assert(CS.getCalledFunction() == F);
    Instruction *Call = CS.getInstruction();
    const AttributeList &CallPAL = CS.getAttributes();

    // Loop over the operands, inserting GEP and loads in the caller as
    // appropriate.
    CallSite::arg_iterator AI = CS.arg_begin();
    ArgNo = 0;
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
         ++I, ++AI, ++ArgNo)
      if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
        Args.push_back(*AI); // Unmodified argument
        ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
      } else if (ByValArgsToTransform.count(&*I)) {
        // Emit a GEP and load for each element of the struct.
        Type *AgTy = cast<PointerType>(I->getType())->getElementType();
        StructType *STy = cast<StructType>(AgTy);
        Value *Idxs[2] = {
            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
          Value *Idx = GetElementPtrInst::Create(
              STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
          // TODO: Tell AA about the new values?
          Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call));
          ArgAttrVec.push_back(AttributeSet());
        }
      } else if (!I->use_empty()) {
        // Non-dead argument: insert GEPs and loads as appropriate.
        ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
        // Store the Value* version of the indices in here, but declare it now
        // for reuse.
        std::vector<Value *> Ops;
        for (const auto &ArgIndex : ArgIndices) {
          Value *V = *AI;
          LoadInst *OrigLoad =
              OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
          if (!ArgIndex.second.empty()) {
            Ops.reserve(ArgIndex.second.size());
            Type *ElTy = V->getType();
            for (auto II : ArgIndex.second) {
              // Use i32 to index structs, and i64 for others (pointers/arrays).
              // This satisfies GEP constraints.
              Type *IdxTy =
                  (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext())
                                      : Type::getInt64Ty(F->getContext()));
              Ops.push_back(ConstantInt::get(IdxTy, II));
              // Keep track of the type we're currently indexing.
              if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
                ElTy = ElPTy->getElementType();
              else
                ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
            }
            // And create a GEP to extract those indices.
            V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
                                          V->getName() + ".idx", Call);
            Ops.clear();
          }
          // Since we're replacing a load make sure we take the alignment
          // of the previous load.
          LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call);
          newLoad->setAlignment(OrigLoad->getAlignment());
          // Transfer the AA info too.
          AAMDNodes AAInfo;
          OrigLoad->getAAMetadata(AAInfo);
          newLoad->setAAMetadata(AAInfo);

          Args.push_back(newLoad);
          ArgAttrVec.push_back(AttributeSet());
        }
      }

    // Push any varargs arguments on the list.
    for (; AI != CS.arg_end(); ++AI, ++ArgNo) {
      Args.push_back(*AI);
      ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
    }

    SmallVector<OperandBundleDef, 1> OpBundles;
    CS.getOperandBundlesAsDefs(OpBundles);

    CallSite NewCS;
    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
      NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
                                 Args, OpBundles, "", Call);
    } else {
      auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call);
      NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
      NewCS = NewCall;
    }
    NewCS.setCallingConv(CS.getCallingConv());
    NewCS.setAttributes(
        AttributeList::get(F->getContext(), CallPAL.getFnAttributes(),
                           CallPAL.getRetAttributes(), ArgAttrVec));
    NewCS->setDebugLoc(Call->getDebugLoc());
    uint64_t W;
    if (Call->extractProfTotalWeight(W))
      NewCS->setProfWeight(W);
    Args.clear();
    ArgAttrVec.clear();

    // Update the callgraph to know that the callsite has been transformed.
    if (ReplaceCallSite)
      (*ReplaceCallSite)(CS, NewCS);

    if (!Call->use_empty()) {
      Call->replaceAllUsesWith(NewCS.getInstruction());
      NewCS->takeName(Call);
    }

    // Finally, remove the old call from the program, reducing the use-count of
    // F.
    Call->eraseFromParent();
  }

  const DataLayout &DL = F->getParent()->getDataLayout();

  // Since we have now created the new function, splice the body of the old
  // function right into the new function, leaving the old rotting hulk of the
  // function empty.
  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());

  // Loop over the argument list, transferring uses of the old arguments over to
  // the new arguments, also transferring over the names as well.
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
                              I2 = NF->arg_begin();
       I != E; ++I) {
    if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
      // If this is an unmodified argument, move the name and users over to the
      // new version.
      I->replaceAllUsesWith(&*I2);
      I2->takeName(&*I);
      ++I2;
      continue;
    }

    if (ByValArgsToTransform.count(&*I)) {
      // In the callee, we create an alloca, and store each of the new incoming
      // arguments into the alloca.
      Instruction *InsertPt = &NF->begin()->front();

      // Just add all the struct element types.
      Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
                                        I->getParamAlignment(), "", InsertPt);
      StructType *STy = cast<StructType>(AgTy);
      Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
                        nullptr};

      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
        Value *Idx = GetElementPtrInst::Create(
            AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
            InsertPt);
        I2->setName(I->getName() + "." + Twine(i));
        new StoreInst(&*I2++, Idx, InsertPt);
      }

      // Anything that used the arg should now use the alloca.
      I->replaceAllUsesWith(TheAlloca);
      TheAlloca->takeName(&*I);

      // If the alloca is used in a call, we must clear the tail flag since
      // the callee now uses an alloca from the caller.
      for (User *U : TheAlloca->users()) {
        CallInst *Call = dyn_cast<CallInst>(U);
        if (!Call)
          continue;
        Call->setTailCall(false);
      }
      continue;
    }

    if (I->use_empty())
      continue;

    // Otherwise, if we promoted this argument, then all users are load
    // instructions (or GEPs with only load users), and all loads should be
    // using the new argument that we added.
    ScalarizeTable &ArgIndices = ScalarizedElements[&*I];

    while (!I->use_empty()) {
      if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
        assert(ArgIndices.begin()->second.empty() &&
               "Load element should sort to front!");
        I2->setName(I->getName() + ".val");
        LI->replaceAllUsesWith(&*I2);
        LI->eraseFromParent();
        DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
                     << "' in function '" << F->getName() << "'\n");
      } else {
        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
        IndicesVector Operands;
        Operands.reserve(GEP->getNumIndices());
        for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
             II != IE; ++II)
          Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());

        // GEPs with a single 0 index can be merged with direct loads
        if (Operands.size() == 1 && Operands.front() == 0)
          Operands.clear();

        Function::arg_iterator TheArg = I2;
        for (ScalarizeTable::iterator It = ArgIndices.begin();
             It->second != Operands; ++It, ++TheArg) {
          assert(It != ArgIndices.end() && "GEP not handled??");
        }

        std::string NewName = I->getName();
        for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
          NewName += "." + utostr(Operands[i]);
        }
        NewName += ".val";
        TheArg->setName(NewName);

        DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
                     << "' of function '" << NF->getName() << "'\n");

        // All of the uses must be load instructions.  Replace them all with
        // the argument specified by ArgNo.
        while (!GEP->use_empty()) {
          LoadInst *L = cast<LoadInst>(GEP->user_back());
          L->replaceAllUsesWith(&*TheArg);
          L->eraseFromParent();
        }
        GEP->eraseFromParent();
      }
    }

    // Increment I2 past all of the arguments added for this promoted pointer.
    std::advance(I2, ArgIndices.size());
  }

  return NF;
}