Example #1
0
CallSite
ABICallSignature::emitUnmanagedCall(GenIR &Reader, Value *Target, bool MayThrow,
                                    ArrayRef<Value *> Arguments) const {
  const LLILCJitContext &JitContext = *Reader.JitContext;
  const struct CORINFO_EE_INFO::InlinedCallFrameInfo &CallFrameInfo =
      JitContext.EEInfo.inlinedCallFrameInfo;
  LLVMContext &LLVMContext = *JitContext.LLVMContext;
  Type *Int8Ty = Type::getInt8Ty(LLVMContext);
  Type *Int32Ty = Type::getInt32Ty(LLVMContext);
  Type *Int8PtrTy = Reader.getUnmanagedPointerType(Int8Ty);
  IRBuilder<> &Builder = *Reader.LLVMBuilder;

  Reader.insertIRForUnmanagedCallFrame();

  Value *CallFrame = Reader.UnmanagedCallFrame;
  Value *Thread = Reader.ThreadPointer;
  assert(CallFrame != nullptr);
  assert(Thread != nullptr);

  // Set m_pDatum if necessary
  //
  // TODO: this needs to be updated for direct unmanaged calls, which require
  //       the target method handle instead of the stub secret parameter.
  if (Reader.MethodSignature.hasSecretParameter()) {
    Value *SecretParameter = Reader.secretParam();
    Value *CallTargetAddress =
        getFieldAddress(Builder, CallFrame, CallFrameInfo.offsetOfCallTarget,
                        SecretParameter->getType());
    Builder.CreateStore(SecretParameter, CallTargetAddress);
  }

  // Push the unmanaged call frame
  Value *FrameVPtr = getFieldAddress(Builder, CallFrame,
                                     CallFrameInfo.offsetOfFrameVptr, Int8Ty);
  Value *ThreadBase = Builder.CreateLoad(Thread);
  Value *ThreadFrameAddress = getFieldAddress(
      Builder, ThreadBase, JitContext.EEInfo.offsetOfThreadFrame, Int8PtrTy);
  Builder.CreateStore(FrameVPtr, ThreadFrameAddress);

  // Compute the address of the return address field
  Value *ReturnAddressAddress = getFieldAddress(
      Builder, CallFrame, CallFrameInfo.offsetOfReturnAddress, Int8PtrTy);

  // Compute the address of the GC mode field
  Value *GCStateAddress = getFieldAddress(
      Builder, ThreadBase, JitContext.EEInfo.offsetOfGCState, Int8Ty);

  // Compute address of the thread trap field
  Value *ThreadTrapAddress = nullptr;
  Type *ThreadTrapAddressTy = Reader.getUnmanagedPointerType(Int32Ty);
  void *IndirectAddrOfCaptureThreadGlobal = nullptr;
  void *AddrOfCaptureThreadGlobal =
      (void *)JitContext.JitInfo->getAddrOfCaptureThreadGlobal(
          &IndirectAddrOfCaptureThreadGlobal);
  void *AddrOfCaptureThreadHandle;
  bool IsIndirect;
  const bool IsReadOnly = true;
  const bool IsRelocatable = true;
  const bool IsCallTarget = false;
  if (AddrOfCaptureThreadGlobal != nullptr) {
    AddrOfCaptureThreadHandle = AddrOfCaptureThreadGlobal;
    IsIndirect = false;
  } else {
    AddrOfCaptureThreadHandle = IndirectAddrOfCaptureThreadGlobal;
    IsIndirect = true;
  }
  Value *RawThreadTrapAddress =
      Reader.handleToIRNode(mdtCaptureThreadGlobal, AddrOfCaptureThreadHandle,
                            AddrOfCaptureThreadHandle, IsIndirect, IsReadOnly,
                            IsRelocatable, IsCallTarget);
  ThreadTrapAddress =
      Builder.CreateIntToPtr(RawThreadTrapAddress, ThreadTrapAddressTy);

  // Compute address of GC pause helper
  Value *PauseHelperAddress =
      (Value *)Reader.getHelperCallAddress(CORINFO_HELP_STOP_FOR_GC);

  // Construct the call.
  //
  // The transition args are:
  // 0) Address of the return address field
  // 1) Address of the GC mode field
  // 2) Address of the thread trap global
  // 3) Address of CORINFO_HELP_STOP_FOR_GC
  Value *TransitionArgs[] = {ReturnAddressAddress, GCStateAddress,
                             ThreadTrapAddress, PauseHelperAddress};
  OperandBundleDef TransitionBundle("gc-transition", TransitionArgs);

  CallSite Call =
      Reader.makeCall(Target, MayThrow, Arguments, {TransitionBundle});
  assert(Call.getOperandBundle(LLVMContext::OB_gc_transition).hasValue() &&
         "tag string mismatch?");

  // Deactivate the unmanaged call frame
  Builder.CreateStore(Constant::getNullValue(Int8PtrTy), ReturnAddressAddress);

  // Pop the unmanaged call frame
  Value *FrameLinkAddress = getFieldAddress(
      Builder, CallFrame, CallFrameInfo.offsetOfFrameLink, Int8PtrTy);
  Value *FrameLink = Builder.CreateLoad(FrameLinkAddress);
  Builder.CreateStore(FrameLink, ThreadFrameAddress);

  return Call;
}
Value GranularityRounderPreferredNumbers::roundDown(Value value) {
    uassertNonNegativeNumber(value);

    if (value.coerceToDouble() == 0.0) {
        return value;
    }

    if (value.getType() == BSONType::NumberDecimal) {
        Decimal128 number = value.getDecimal();
        Decimal128 multiplier = Decimal128(1);

        // '_baseSeries' contains doubles, so we create a vector that contains the Decimal128
        // versions of the numbers in '_baseSeries' to make it easier to compare values to 'number'.
        vector<Decimal128> decimalSeries;
        for (auto&& doubleNumber : _baseSeries) {
            decimalSeries.push_back(Decimal128(doubleNumber));
        }

        while (number.isLessEqual(decimalSeries.front().multiply(multiplier))) {
            multiplier = multiplier.divide(Decimal128(10));
        }

        Decimal128 previousMax;
        while (number.isGreater(decimalSeries.back().multiply(multiplier))) {
            previousMax = decimalSeries.back().multiply(multiplier);
            multiplier = multiplier.multiply(Decimal128(10));
            if (number.isLessEqual(decimalSeries.front().multiply(multiplier))) {
                // The number is less than or equal to the current min, so it must round down to the
                // previous max. For example, rounding down 0.8 in the E6 series.
                return Value(previousMax);
            }
        }

        // After scaling up or down, 'number' should now fall into the range spanned by
        // decimalSeries[i] * multiplier for all i in decimalSeries.
        invariant(number.isGreater(decimalSeries.front().multiply(multiplier)) &&
                  number.isLessEqual(decimalSeries.back().multiply(multiplier)));

        // Get an iterator pointing to the first element in '_baseSeries' that is greater than or
        // equal to 'number'.
        auto iterator =
            std::lower_bound(decimalSeries.begin(),
                             decimalSeries.end(),
                             number,
                             [multiplier](Decimal128 seriesNumber, Decimal128 roundingNumber) {
                                 return seriesNumber.multiply(multiplier).isLess(roundingNumber);
                             });

        // We need to move the iterator back by one so that we round down to a number that is
        // strictly less than the value we are rounding.
        return Value(Value((*(iterator - 1)).multiply(multiplier)));
    } else {
        double number = value.coerceToDouble();
        double multiplier = 1.0;

        while (number <= (_baseSeries.front() * multiplier)) {
            multiplier /= 10.0;
        }

        double previousMax;
        while (number > (_baseSeries.back() * multiplier)) {
            previousMax = _baseSeries.back() * multiplier;
            multiplier *= 10.0;
            if (number <= _baseSeries.front() * multiplier) {
                // The number is less than or equal to the current min, so it must round down to the
                // previous max. For example, rounding down 0.8 in the E6 series.
                return Value(previousMax);
            }
        }

        // After scaling up or down, 'number' should now fall into the range spanned by
        // _baseSeries[i] * multiplier for all i in _baseSeries.
        invariant(number > (_baseSeries.front() * multiplier) &&
                  number <= (_baseSeries.back() * multiplier));

        // Get an iterator pointing to the first element in '_baseSeries' that is greater than or
        // equal to 'number'.
        auto iterator = std::lower_bound(_baseSeries.begin(),
                                         _baseSeries.end(),
                                         number,
                                         [multiplier](double seriesNumber, double roundingNumber) {
                                             return (seriesNumber * multiplier) < roundingNumber;
                                         });

        // We need to move the iterator back by one so that we round down to a number that is
        // strictly less than the value we are rounding.
        return Value(Value(*(iterator - 1) * multiplier));
    }
}
Example #3
0
CallSite ABICallSignature::emitUnmanagedCall(GenIR &Reader, Value *Target,
                                             bool MayThrow,
                                             ArrayRef<Value *> Arguments,
                                             Value *&Result) const {
  const LLILCJitContext &JitContext = *Reader.JitContext;
  const struct CORINFO_EE_INFO::InlinedCallFrameInfo &CallFrameInfo =
      JitContext.EEInfo.inlinedCallFrameInfo;
  LLVMContext &LLVMContext = *JitContext.LLVMContext;
  Type *Int8Ty = Type::getInt8Ty(LLVMContext);
  Type *Int32Ty = Type::getInt32Ty(LLVMContext);
  Type *Int64Ty = Type::getInt64Ty(LLVMContext);
  Type *Int8PtrTy = Reader.getUnmanagedPointerType(Int8Ty);
  IRBuilder<> &Builder = *Reader.LLVMBuilder;

  Reader.insertIRForUnmanagedCallFrame();

  Value *CallFrame = Reader.UnmanagedCallFrame;
  Value *Thread = Reader.ThreadPointer;
  assert(CallFrame != nullptr);
  assert(Thread != nullptr);

  // Set m_pDatum if necessary
  //
  // TODO: this needs to be updated for direct unmanaged calls, which require
  //       the target method handle instead of the stub secret parameter.
  if (Reader.MethodSignature.hasSecretParameter()) {
    Value *SecretParameter = Reader.secretParam();
    Value *CallTargetAddress =
        getFieldAddress(Builder, CallFrame, CallFrameInfo.offsetOfCallTarget,
                        SecretParameter->getType());
    Builder.CreateStore(SecretParameter, CallTargetAddress);
  }

  // Push the unmanaged call frame
  Value *FrameVPtr = getFieldAddress(Builder, CallFrame,
                                     CallFrameInfo.offsetOfFrameVptr, Int8Ty);
  Value *ThreadBase = Builder.CreateLoad(Thread);
  Value *ThreadFrameAddress = getFieldAddress(
      Builder, ThreadBase, JitContext.EEInfo.offsetOfThreadFrame, Int8PtrTy);
  Builder.CreateStore(FrameVPtr, ThreadFrameAddress);

  // Compute the address of the return address field
  Value *ReturnAddressAddress = getFieldAddress(
      Builder, CallFrame, CallFrameInfo.offsetOfReturnAddress, Int8PtrTy);

  // Compute the address of the GC mode field
  Value *GCStateAddress = getFieldAddress(
      Builder, ThreadBase, JitContext.EEInfo.offsetOfGCState, Int8Ty);

  // Compute address of the thread trap field
  Value *ThreadTrapAddress = nullptr;
  Type *ThreadTrapAddressTy = Reader.getUnmanagedPointerType(Int32Ty);
  void *IndirectAddrOfCaptureThreadGlobal = nullptr;
  void *AddrOfCaptureThreadGlobal =
      (void *)JitContext.JitInfo->getAddrOfCaptureThreadGlobal(
          &IndirectAddrOfCaptureThreadGlobal);
  void *AddrOfCaptureThreadHandle;
  bool IsIndirect;
  const bool IsReadOnly = true;
  const bool IsRelocatable = true;
  const bool IsCallTarget = false;
  if (AddrOfCaptureThreadGlobal != nullptr) {
    AddrOfCaptureThreadHandle = AddrOfCaptureThreadGlobal;
    IsIndirect = false;
  } else {
    AddrOfCaptureThreadHandle = IndirectAddrOfCaptureThreadGlobal;
    IsIndirect = true;
  }
  Value *RawThreadTrapAddress =
      Reader.handleToIRNode(mdtCaptureThreadGlobal, AddrOfCaptureThreadHandle,
                            AddrOfCaptureThreadHandle, IsIndirect, IsReadOnly,
                            IsRelocatable, IsCallTarget);
  ThreadTrapAddress =
      Builder.CreateIntToPtr(RawThreadTrapAddress, ThreadTrapAddressTy);

  // Compute address of GC pause helper
  Value *PauseHelperAddress =
      (Value *)Reader.getHelperCallAddress(CORINFO_HELP_STOP_FOR_GC);

  // Construct the call.
  //
  // The signature of the intrinsic is:
  // @llvm.experimental_gc_transition(
  //   fn_ptr target,
  //   i32 numCallArgs,
  //   i32 flags,
  //   ... call args ...,
  //   i32 numTransitionArgs,
  //   ... transition args...,
  //   i32 numDeoptArgs,
  //   ... deopt args...)
  //
  // In the case of CoreCLR, there are 4 transition args and 0 deopt args.
  //
  // The transition args are:
  // 0) Address of the return address field
  // 1) Address of the GC mode field
  // 2) Address of the thread trap global
  // 3) Address of CORINFO_HELP_STOP_FOR_GC
  Module *M = Reader.Function->getParent();
  Type *CallTypeArgs[] = {Target->getType()};
  Function *CallIntrinsic = Intrinsic::getDeclaration(
      M, Intrinsic::experimental_gc_statepoint, CallTypeArgs);

  const uint32_t PrefixArgCount = 5;
  const uint32_t TransitionArgCount = 4;
  const uint32_t PostfixArgCount = TransitionArgCount + 2;
  const uint32_t TargetArgCount = Arguments.size();
  SmallVector<Value *, 24> IntrinsicArgs(PrefixArgCount + TargetArgCount +
                                         PostfixArgCount);

  // ID, nop bytes, call target and target arguments
  IntrinsicArgs[0] = ConstantInt::get(Int64Ty, 0);
  IntrinsicArgs[1] = ConstantInt::get(Int32Ty, 0);
  IntrinsicArgs[2] = Target;
  IntrinsicArgs[3] = ConstantInt::get(Int32Ty, TargetArgCount);
  IntrinsicArgs[4] =
      ConstantInt::get(Int32Ty, (uint32_t)StatepointFlags::GCTransition);

  uint32_t I, J;
  for (I = 0, J = PrefixArgCount; I < TargetArgCount; I++, J++) {
    IntrinsicArgs[J] = Arguments[I];
  }

  // GC transition arguments
  IntrinsicArgs[J] = ConstantInt::get(Int32Ty, TransitionArgCount);
  IntrinsicArgs[J + 1] = ReturnAddressAddress;
  IntrinsicArgs[J + 2] = GCStateAddress;
  IntrinsicArgs[J + 3] = ThreadTrapAddress;
  IntrinsicArgs[J + 4] = PauseHelperAddress;

  // Deopt arguments
  IntrinsicArgs[J + 5] = ConstantInt::get(Int32Ty, 0);

  CallSite Call = Reader.makeCall(CallIntrinsic, MayThrow, IntrinsicArgs);

  // Get the call result if necessary
  if (!FuncResultType->isVoidTy()) {
    Type *ResultTypeArgs[] = {FuncResultType};
    Function *ResultIntrinsic = Intrinsic::getDeclaration(
        M, Intrinsic::experimental_gc_result, ResultTypeArgs);
    Result = Builder.CreateCall(ResultIntrinsic, Call.getInstruction());
  }

  // Deactivate the unmanaged call frame
  Builder.CreateStore(Constant::getNullValue(Int8PtrTy), ReturnAddressAddress);

  // Pop the unmanaged call frame
  Value *FrameLinkAddress = getFieldAddress(
      Builder, CallFrame, CallFrameInfo.offsetOfFrameLink, Int8PtrTy);
  Value *FrameLink = Builder.CreateLoad(FrameLinkAddress);
  Builder.CreateStore(FrameLink, ThreadFrameAddress);

  return Call;
}
/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
/// when possible.  This makes it generally easy to do alias analysis and/or
/// SROA/mem2reg of the memory object.
static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
  User *CI = cast<User>(SI.getOperand(1));
  Value *CastOp = CI->getOperand(0);

  const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
  const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
  if (SrcTy == 0) return 0;
  
  const Type *SrcPTy = SrcTy->getElementType();

  if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
    return 0;
  
  /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
  /// to its first element.  This allows us to handle things like:
  ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
  /// on 32-bit hosts.
  SmallVector<Value*, 4> NewGEPIndices;
  
  // If the source is an array, the code below will not succeed.  Check to
  // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
  // constants.
  if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
    // Index through pointer.
    Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
    NewGEPIndices.push_back(Zero);
    
    while (1) {
      if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) {
        if (!STy->getNumElements()) /* Struct can be empty {} */
          break;
        NewGEPIndices.push_back(Zero);
        SrcPTy = STy->getElementType(0);
      } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
        NewGEPIndices.push_back(Zero);
        SrcPTy = ATy->getElementType();
      } else {
        break;
      }
    }
    
    SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
  }

  if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
    return 0;
  
  // If the pointers point into different address spaces or if they point to
  // values with different sizes, we can't do the transformation.
  if (!IC.getTargetData() ||
      SrcTy->getAddressSpace() != 
        cast<PointerType>(CI->getType())->getAddressSpace() ||
      IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
      IC.getTargetData()->getTypeSizeInBits(DestPTy))
    return 0;

  // Okay, we are casting from one integer or pointer type to another of
  // the same size.  Instead of casting the pointer before 
  // the store, cast the value to be stored.
  Value *NewCast;
  Value *SIOp0 = SI.getOperand(0);
  Instruction::CastOps opcode = Instruction::BitCast;
  const Type* CastSrcTy = SIOp0->getType();
  const Type* CastDstTy = SrcPTy;
  if (CastDstTy->isPointerTy()) {
    if (CastSrcTy->isIntegerTy())
      opcode = Instruction::IntToPtr;
  } else if (CastDstTy->isIntegerTy()) {
    if (SIOp0->getType()->isPointerTy())
      opcode = Instruction::PtrToInt;
  }
  
  // SIOp0 is a pointer to aggregate and this is a store to the first field,
  // emit a GEP to index into its first field.
  if (!NewGEPIndices.empty())
    CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
                                           NewGEPIndices.end());
  
  NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
                                   SIOp0->getName()+".c");
  SI.setOperand(0, NewCast);
  SI.setOperand(1, CastOp);
  return &SI;
}
/// SimplifyStoreAtEndOfBlock - Turn things like:
///   if () { *P = v1; } else { *P = v2 }
/// into a phi node with a store in the successor.
///
/// Simplify things like:
///   *P = v1; if () { *P = v2; }
/// into a phi node with a store in the successor.
///
bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
  BasicBlock *StoreBB = SI.getParent();
  
  // Check to see if the successor block has exactly two incoming edges.  If
  // so, see if the other predecessor contains a store to the same location.
  // if so, insert a PHI node (if needed) and move the stores down.
  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
  
  // Determine whether Dest has exactly two predecessors and, if so, compute
  // the other predecessor.
  pred_iterator PI = pred_begin(DestBB);
  BasicBlock *P = *PI;
  BasicBlock *OtherBB = 0;

  if (P != StoreBB)
    OtherBB = P;

  if (++PI == pred_end(DestBB))
    return false;
  
  P = *PI;
  if (P != StoreBB) {
    if (OtherBB)
      return false;
    OtherBB = P;
  }
  if (++PI != pred_end(DestBB))
    return false;

  // Bail out if all the relevant blocks aren't distinct (this can happen,
  // for example, if SI is in an infinite loop)
  if (StoreBB == DestBB || OtherBB == DestBB)
    return false;

  // Verify that the other block ends in a branch and is not otherwise empty.
  BasicBlock::iterator BBI = OtherBB->getTerminator();
  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
  if (!OtherBr || BBI == OtherBB->begin())
    return false;
  
  // If the other block ends in an unconditional branch, check for the 'if then
  // else' case.  there is an instruction before the branch.
  StoreInst *OtherStore = 0;
  if (OtherBr->isUnconditional()) {
    --BBI;
    // Skip over debugging info.
    while (isa<DbgInfoIntrinsic>(BBI) ||
           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
      if (BBI==OtherBB->begin())
        return false;
      --BBI;
    }
    // If this isn't a store, isn't a store to the same location, or if the
    // alignments differ, bail out.
    OtherStore = dyn_cast<StoreInst>(BBI);
    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
        OtherStore->getAlignment() != SI.getAlignment())
      return false;
  } else {
    // Otherwise, the other block ended with a conditional branch. If one of the
    // destinations is StoreBB, then we have the if/then case.
    if (OtherBr->getSuccessor(0) != StoreBB && 
        OtherBr->getSuccessor(1) != StoreBB)
      return false;
    
    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
    // if/then triangle.  See if there is a store to the same ptr as SI that
    // lives in OtherBB.
    for (;; --BBI) {
      // Check to see if we find the matching store.
      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
            OtherStore->getAlignment() != SI.getAlignment())
          return false;
        break;
      }
      // If we find something that may be using or overwriting the stored
      // value, or if we run out of instructions, we can't do the xform.
      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
          BBI == OtherBB->begin())
        return false;
    }
    
    // In order to eliminate the store in OtherBr, we have to
    // make sure nothing reads or overwrites the stored value in
    // StoreBB.
    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
      // FIXME: This should really be AA driven.
      if (I->mayReadFromMemory() || I->mayWriteToMemory())
        return false;
    }
  }
  
  // Insert a PHI node now if we need it.
  Value *MergedVal = OtherStore->getOperand(0);
  if (MergedVal != SI.getOperand(0)) {
    PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge");
    PN->reserveOperandSpace(2);
    PN->addIncoming(SI.getOperand(0), SI.getParent());
    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
    MergedVal = InsertNewInstBefore(PN, DestBB->front());
  }
  
  // Advance to a place where it is safe to insert the new store and
  // insert it.
  BBI = DestBB->getFirstNonPHI();
  InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
                                    OtherStore->isVolatile(),
                                    SI.getAlignment()), *BBI);
  
  // Nuke the old stores.
  EraseInstFromFunction(SI);
  EraseInstFromFunction(*OtherStore);
  return true;
}
Example #6
0
static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
        const SCCNodeSet &SCCNodes) {
    FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
    if (MRB == FMRB_DoesNotAccessMemory)
        // Already perfect!
        return MAK_ReadNone;

    // Definitions with weak linkage may be overridden at linktime with
    // something that writes memory, so treat them like declarations.
    if (F.isDeclaration() || F.mayBeOverridden()) {
        if (AliasAnalysis::onlyReadsMemory(MRB))
            return MAK_ReadOnly;

        // Conservatively assume it writes to memory.
        return MAK_MayWrite;
    }

    // Scan the function body for instructions that may read or write memory.
    bool ReadsMemory = false;
    for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
        Instruction *I = &*II;

        // Some instructions can be ignored even if they read or write memory.
        // Detect these now, skipping to the next instruction if one is found.
        CallSite CS(cast<Value>(I));
        if (CS) {
            // Ignore calls to functions in the same SCC.
            if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
                continue;
            FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS);

            // If the call doesn't access memory, we're done.
            if (!(MRB & MRI_ModRef))
                continue;

            if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
                // The call could access any memory. If that includes writes, give up.
                if (MRB & MRI_Mod)
                    return MAK_MayWrite;
                // If it reads, note it.
                if (MRB & MRI_Ref)
                    ReadsMemory = true;
                continue;
            }

            // Check whether all pointer arguments point to local memory, and
            // ignore calls that only access local memory.
            for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
                    CI != CE; ++CI) {
                Value *Arg = *CI;
                if (!Arg->getType()->isPtrOrPtrVectorTy())
                    continue;

                AAMDNodes AAInfo;
                I->getAAMetadata(AAInfo);
                MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);

                // Skip accesses to local or constant memory as they don't impact the
                // externally visible mod/ref behavior.
                if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
                    continue;

                if (MRB & MRI_Mod)
                    // Writes non-local memory.  Give up.
                    return MAK_MayWrite;
                if (MRB & MRI_Ref)
                    // Ok, it reads non-local memory.
                    ReadsMemory = true;
            }
            continue;
        } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
            // Ignore non-volatile loads from local memory. (Atomic is okay here.)
            if (!LI->isVolatile()) {
                MemoryLocation Loc = MemoryLocation::get(LI);
                if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
                    continue;
            }
        } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
            // Ignore non-volatile stores to local memory. (Atomic is okay here.)
            if (!SI->isVolatile()) {
                MemoryLocation Loc = MemoryLocation::get(SI);
                if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
                    continue;
            }
        } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
            // Ignore vaargs on local memory.
            MemoryLocation Loc = MemoryLocation::get(VI);
            if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
                continue;
        }

        // Any remaining instructions need to be taken seriously!  Check if they
        // read or write memory.
        if (I->mayWriteToMemory())
            // Writes memory.  Just give up.
            return MAK_MayWrite;

        // If this instruction may read memory, remember that.
        ReadsMemory |= I->mayReadFromMemory();
    }

    return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
}
Example #7
0
/// Takes a memcpy and a call that it depends on,
/// and checks for the possibility of a call slot optimization by having
/// the call write its result directly into the destination of the memcpy.
bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
                                     Value *cpyDest, Value *cpySrc,
                                     uint64_t cpyLen, unsigned cpyAlign,
                                     CallInst *C) {
  // The general transformation to keep in mind is
  //
  //   call @func(..., src, ...)
  //   memcpy(dest, src, ...)
  //
  // ->
  //
  //   memcpy(dest, src, ...)
  //   call @func(..., dest, ...)
  //
  // Since moving the memcpy is technically awkward, we additionally check that
  // src only holds uninitialized values at the moment of the call, meaning that
  // the memcpy can be discarded rather than moved.

  // Deliberately get the source and destination with bitcasts stripped away,
  // because we'll need to do type comparisons based on the underlying type.
  CallSite CS(C);

  // Require that src be an alloca.  This simplifies the reasoning considerably.
  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
  if (!srcAlloca)
    return false;

  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
  if (!srcArraySize)
    return false;

  const DataLayout &DL = cpy->getModule()->getDataLayout();
  uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
                     srcArraySize->getZExtValue();

  if (cpyLen < srcSize)
    return false;

  // Check that accessing the first srcSize bytes of dest will not cause a
  // trap.  Otherwise the transform is invalid since it might cause a trap
  // to occur earlier than it otherwise would.
  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
    // The destination is an alloca.  Check it is larger than srcSize.
    ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
    if (!destArraySize)
      return false;

    uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) *
                        destArraySize->getZExtValue();

    if (destSize < srcSize)
      return false;
  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
    if (A->getDereferenceableBytes() < srcSize) {
      // If the destination is an sret parameter then only accesses that are
      // outside of the returned struct type can trap.
      if (!A->hasStructRetAttr())
        return false;

      Type *StructTy = cast<PointerType>(A->getType())->getElementType();
      if (!StructTy->isSized()) {
        // The call may never return and hence the copy-instruction may never
        // be executed, and therefore it's not safe to say "the destination
        // has at least <cpyLen> bytes, as implied by the copy-instruction",
        return false;
      }

      uint64_t destSize = DL.getTypeAllocSize(StructTy);
      if (destSize < srcSize)
        return false;
    }
  } else {
    return false;
  }

  // Check that dest points to memory that is at least as aligned as src.
  unsigned srcAlign = srcAlloca->getAlignment();
  if (!srcAlign)
    srcAlign = DL.getABITypeAlignment(srcAlloca->getAllocatedType());
  bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
  // If dest is not aligned enough and we can't increase its alignment then
  // bail out.
  if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
    return false;

  // Check that src is not accessed except via the call and the memcpy.  This
  // guarantees that it holds only undefined values when passed in (so the final
  // memcpy can be dropped), that it is not read or written between the call and
  // the memcpy, and that writing beyond the end of it is undefined.
  SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(),
                                   srcAlloca->user_end());
  while (!srcUseList.empty()) {
    User *U = srcUseList.pop_back_val();

    if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
      for (User *UU : U->users())
        srcUseList.push_back(UU);
      continue;
    }
    if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
      if (!G->hasAllZeroIndices())
        return false;

      for (User *UU : U->users())
        srcUseList.push_back(UU);
      continue;
    }
    if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
      if (IT->getIntrinsicID() == Intrinsic::lifetime_start ||
          IT->getIntrinsicID() == Intrinsic::lifetime_end)
        continue;

    if (U != C && U != cpy)
      return false;
  }

  // Check that src isn't captured by the called function since the
  // transformation can cause aliasing issues in that case.
  for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
    if (CS.getArgument(i) == cpySrc && !CS.doesNotCapture(i))
      return false;

  // Since we're changing the parameter to the callsite, we need to make sure
  // that what would be the new parameter dominates the callsite.
  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
    if (!DT.dominates(cpyDestInst, C))
      return false;

  // In addition to knowing that the call does not access src in some
  // unexpected manner, for example via a global, which we deduce from
  // the use analysis, we also need to know that it does not sneakily
  // access dest.  We rely on AA to figure this out for us.
  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
  ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize);
  // If necessary, perform additional analysis.
  if (MR != MRI_NoModRef)
    MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
  if (MR != MRI_NoModRef)
    return false;

  // All the checks have passed, so do the transformation.
  bool changedArgument = false;
  for (unsigned i = 0; i < CS.arg_size(); ++i)
    if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
      Value *Dest = cpySrc->getType() == cpyDest->getType() ?  cpyDest
        : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
                                      cpyDest->getName(), C);
      changedArgument = true;
      if (CS.getArgument(i)->getType() == Dest->getType())
        CS.setArgument(i, Dest);
      else
        CS.setArgument(i, CastInst::CreatePointerCast(Dest,
                          CS.getArgument(i)->getType(), Dest->getName(), C));
    }

  if (!changedArgument)
    return false;

  // If the destination wasn't sufficiently aligned then increase its alignment.
  if (!isDestSufficientlyAligned) {
    assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
    cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
  }

  // Drop any cached information about the call, because we may have changed
  // its dependence information by changing its parameter.
  MD->removeInstruction(C);

  // Update AA metadata
  // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
  // handled here, but combineMetadata doesn't support them yet
  unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
                         LLVMContext::MD_noalias,
                         LLVMContext::MD_invariant_group};
  combineMetadata(C, cpy, KnownIDs);

  // Remove the memcpy.
  MD->removeInstruction(cpy);
  ++NumMemCpyInstr;

  return true;
}
  /*
   * Reduce this module with respect to the given interface.
   * - The interface suggests some of the uses of the functions,
   *   so here we can generate special versions of those functions.
   * Generate a ComponentInterfaceTransform for clients to rewrite their
   * code to use the new API
   */
  bool
  SpecializeComponent(Module& M, ComponentInterfaceTransform& T,
      SpecializationPolicy &policy, std::list<Function*>& to_add)
  {
    int rewrite_count = 0;
    const ComponentInterface& I = T.getInterface();
    // TODO: What needs to be done?
    // - Should try to handle strings & arrays
    for (ComponentInterface::FunctionIterator ff = I.begin(), fe = I.end(); ff
        != fe; ++ff) {
      StringRef name = ff->first();
      Function* func = resolveFunction(M, name);
      if (func == NULL || func->isDeclaration()) {
        // We don't specialize declarations because we don't own them
        continue;
      }

      for (ComponentInterface::CallIterator cc = I.call_begin(name), ce =
          I.call_end(name); cc != ce; ++cc) {
        const CallInfo* const call = *cc;

        const unsigned arg_count = call->args.size();

        if (func->isVarArg()) {
          // TODO: I don't know how to specialize variable argument functions yet
          continue;
        }
        if (arg_count != func->getArgumentList().size()) {
          // Not referring to this function?
          // NOTE: I can't assert this equality because of the way that approximations occur
          continue;
        }

        SmallBitVector slice(arg_count);
        bool shouldSpecialize = policy.specializeOn(func, call->args.begin(),
            call->args.end(), slice);

        if (!shouldSpecialize)
          continue;

        std::vector<Value*> args;
        std::vector<unsigned> argPerm;
        args.reserve(arg_count);
        argPerm.reserve(slice.count());
        for (unsigned i = 0; i < arg_count; i++) {
          if (slice.test(i)) {
	      Type * paramType = func->getFunctionType()->getParamType(i);
	      Value *concreteArg = call->args[i].concretize(M, paramType);
	      args.push_back(concreteArg);
	      assert(concreteArg->getType() == paramType
		     && "Specializing function with concrete argument of wrong type!");
          } else {
            args.push_back(NULL);
            argPerm.push_back(i);
          }
        }

        Function* nfunc = specializeFunction(func, args);
        nfunc->setLinkage(GlobalValue::ExternalLinkage);

        FunctionHandle rewriteTo = nfunc->getName();

        T.rewrite(name, call, rewriteTo, argPerm);

        to_add.push_back(nfunc);
        rewrite_count++;
      }
    }
    if (rewrite_count > 0) {
      errs() << rewrite_count << " pending rewrites\n";
    }
    return rewrite_count > 0;
  }
Example #9
0
// insertFastDiv - Substitutes the div/rem instruction with code that checks the
// value of the operands and uses a shorter-faster div/rem instruction when
// possible and the longer-slower div/rem instruction otherwise.
static bool insertFastDiv(Function &F,
                          Function::iterator &I,
                          BasicBlock::iterator &J,
                          IntegerType *BypassType,
                          bool UseDivOp,
                          bool UseSignedOp,
                          DivCacheTy &PerBBDivCache) {
  // Get instruction operands
  Instruction *Instr = J;
  Value *Dividend = Instr->getOperand(0);
  Value *Divisor = Instr->getOperand(1);

  if (isa<ConstantInt>(Divisor) ||
      (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
    // Operations with immediate values should have
    // been solved and replaced during compile time.
    return false;
  }

  // Basic Block is split before divide
  BasicBlock *MainBB = I;
  BasicBlock *SuccessorBB = I->splitBasicBlock(J);
  ++I; //advance iterator I to successorBB

  // Add new basic block for slow divide operation
  BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "",
                                          MainBB->getParent(), SuccessorBB);
  SlowBB->moveBefore(SuccessorBB);
  IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin());
  Value *SlowQuotientV;
  Value *SlowRemainderV;
  if (UseSignedOp) {
    SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor);
    SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor);
  } else {
    SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor);
    SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor);
  }
  SlowBuilder.CreateBr(SuccessorBB);

  // Add new basic block for fast divide operation
  BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "",
                                          MainBB->getParent(), SuccessorBB);
  FastBB->moveBefore(SlowBB);
  IRBuilder<> FastBuilder(FastBB, FastBB->begin());
  Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor,
                                                BypassType);
  Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend,
                                                 BypassType);

  // udiv/urem because optimization only handles positive numbers
  Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
                                                      ShortDivisorV);
  Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
                                                  ShortDivisorV);
  Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
                                                ShortQuotientV,
                                                Dividend->getType());
  Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt,
                                                 ShortRemainderV,
                                                 Dividend->getType());
  FastBuilder.CreateBr(SuccessorBB);

  // Phi nodes for result of div and rem
  IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin());
  PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
  QuoPhi->addIncoming(SlowQuotientV, SlowBB);
  QuoPhi->addIncoming(FastQuotientV, FastBB);
  PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2);
  RemPhi->addIncoming(SlowRemainderV, SlowBB);
  RemPhi->addIncoming(FastRemainderV, FastBB);

  // Replace Instr with appropriate phi node
  if (UseDivOp)
    Instr->replaceAllUsesWith(QuoPhi);
  else
    Instr->replaceAllUsesWith(RemPhi);
  Instr->eraseFromParent();

  // Combine operands into a single value with OR for value testing below
  MainBB->getInstList().back().eraseFromParent();
  IRBuilder<> MainBuilder(MainBB, MainBB->end());
  Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);

  // BitMask is inverted to check if the operands are
  // larger than the bypass type
  uint64_t BitMask = ~BypassType->getBitMask();
  Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);

  // Compare operand values and branch
  Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
  Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
  MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);

  // point iterator J at first instruction of successorBB
  J = I->begin();

  // Cache phi nodes to be used later in place of other instances
  // of div or rem with the same sign, dividend, and divisor
  DivOpInfo Key(UseSignedOp, Dividend, Divisor);
  DivPhiNodes Value(QuoPhi, RemPhi);
  PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value));
  return true;
}
Example #10
0
void GNUstep::IMPCacher::CacheLookup(Instruction *lookup, Value *slot, Value
                                     *version, bool isSuperMessage) {

    // If this IMP is already cached, don't cache it again.
    if (lookup->getMetadata(IMPCacheFlagKind)) {
        return;
    }

    lookup->setMetadata(IMPCacheFlagKind, AlreadyCachedFlag);
    bool isInvoke = false;

    BasicBlock *beforeLookupBB = lookup->getParent();
    BasicBlock *lookupBB = SplitBlock(beforeLookupBB, lookup, Owner);
    BasicBlock *lookupFinishedBB = lookupBB;
    BasicBlock *afterLookupBB;

    if (InvokeInst *inv = dyn_cast<InvokeInst>(lookup)) {
        afterLookupBB = inv->getNormalDest();
        lookupFinishedBB =
            BasicBlock::Create(Context, "done_lookup", lookupBB->getParent());
        CGBuilder B(lookupFinishedBB);
        B.CreateBr(afterLookupBB);
        inv->setNormalDest(lookupFinishedBB);
        isInvoke = true;
    } else {
        BasicBlock::iterator iter = lookup;
        iter++;
        afterLookupBB = SplitBlock(iter->getParent(), iter, Owner);
    }

    removeTerminator(beforeLookupBB);

    CGBuilder B = CGBuilder(beforeLookupBB);
    // Load the slot and check that neither it nor the version is 0.
    Value *versionValue = B.CreateLoad(version);
    Value *receiverPtr = lookup->getOperand(0);
    Value *receiver = receiverPtr;
    if (!isSuperMessage) {
        receiver = B.CreateLoad(receiverPtr);
    }
    // For small objects, we skip the cache entirely.
    // FIXME: Class messages are never to small objects...
    bool is64Bit = llvm::Module::Pointer64 ==
                   B.GetInsertBlock()->getParent()->getParent()->getPointerSize();
    LLVMType *intPtrTy = is64Bit ? Type::getInt64Ty(Context) :
                         Type::getInt32Ty(Context);

    // Receiver as an integer
    Value *receiverSmallObject = B.CreatePtrToInt(receiver, intPtrTy);
    // Receiver is a small object...
    receiverSmallObject =
        B.CreateAnd(receiverSmallObject, is64Bit ? 7 : 1);
    // Receiver is not a small object.
    receiverSmallObject =
        B.CreateICmpNE(receiverSmallObject, Constant::getNullValue(intPtrTy));
    // Ideally, we'd call objc_msgSend() here, but for now just skip the cache
    // lookup

    Value *isCacheEmpty =
        B.CreateICmpEQ(versionValue, Constant::getNullValue(IntTy));
    Value *receiverNil =
        B.CreateICmpEQ(receiver, Constant::getNullValue(receiver->getType()));

    isCacheEmpty = B.CreateOr(isCacheEmpty, receiverNil);
    isCacheEmpty = B.CreateOr(isCacheEmpty, receiverSmallObject);

    BasicBlock *cacheLookupBB = BasicBlock::Create(Context, "cache_check",
                                lookupBB->getParent());

    B.CreateCondBr(isCacheEmpty, lookupBB, cacheLookupBB);

    // Check the cache node is current
    B.SetInsertPoint(cacheLookupBB);
    Value *slotValue = B.CreateLoad(slot, "slot_value");
    Value *slotVersion = B.CreateStructGEP(slotValue, 3);
    // Note: Volatile load because the slot version might have changed in
    // another thread.
    slotVersion = B.CreateLoad(slotVersion, true, "slot_version");
    Value *slotCachedFor = B.CreateStructGEP(slotValue, 1);
    slotCachedFor = B.CreateLoad(slotCachedFor, true, "slot_owner");
    Value *cls = B.CreateLoad(B.CreateBitCast(receiver, IdTy));
    Value *isVersionCorrect = B.CreateICmpEQ(slotVersion, versionValue);
    Value *isOwnerCorrect = B.CreateICmpEQ(slotCachedFor, cls);
    Value *isSlotValid = B.CreateAnd(isVersionCorrect, isOwnerCorrect);
    // If this slot is still valid, skip the lookup.
    B.CreateCondBr(isSlotValid, afterLookupBB, lookupBB);

    // Perform the real lookup and cache the result
    removeTerminator(lookupFinishedBB);
    // Replace the looked up slot with the loaded one
    B.SetInsertPoint(afterLookupBB, afterLookupBB->begin());
    PHINode *newLookup = IRBuilderCreatePHI(&B, lookup->getType(), 3, "new_lookup");
    // Not volatile, so a redundant load elimination pass can do some phi
    // magic with this later.
    lookup->replaceAllUsesWith(newLookup);

    B.SetInsertPoint(lookupFinishedBB);
    Value * newReceiver = receiver;
    if (!isSuperMessage) {
        newReceiver = B.CreateLoad(receiverPtr);
    }
    BasicBlock *storeCacheBB = BasicBlock::Create(Context, "cache_store",
                               lookupBB->getParent());

    // Don't store the cached lookup if we are doing forwarding tricks.
    // Also skip caching small object messages for now
    Value *skipCacheWrite =
        B.CreateOr(B.CreateICmpNE(receiver, newReceiver), receiverSmallObject);
    skipCacheWrite = B.CreateOr(skipCacheWrite, receiverNil);
    B.CreateCondBr(skipCacheWrite, afterLookupBB, storeCacheBB);
    B.SetInsertPoint(storeCacheBB);

    // Store it even if the version is 0, because we always check that the
    // version is not 0 at the start and an occasional redundant store is
    // probably better than a branch every time.
    B.CreateStore(lookup, slot);
    B.CreateStore(B.CreateLoad(B.CreateStructGEP(lookup, 3)), version);
    cls = B.CreateLoad(B.CreateBitCast(receiver, IdTy));
    B.CreateStore(cls, B.CreateStructGEP(lookup, 1));
    B.CreateBr(afterLookupBB);

    newLookup->addIncoming(lookup, lookupFinishedBB);
    newLookup->addIncoming(slotValue, cacheLookupBB);
    newLookup->addIncoming(lookup, storeCacheBB);
}
Example #11
0
//
// Returns of float, double and complex need to be handled with a helper
// function.
//
static bool fixupFPReturnAndCall(Function &F, Module *M,
                                 const MipsTargetMachine &TM) {
  bool Modified = false;
  LLVMContext &C = M->getContext();
  Type *MyVoid = Type::getVoidTy(C);
  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
    for (BasicBlock::iterator I = BB->begin(), E = BB->end();
         I != E; ++I) {
      Instruction &Inst = *I;
      if (const ReturnInst *RI = dyn_cast<ReturnInst>(I)) {
        Value *RVal = RI->getReturnValue();
        if (!RVal) continue;
        //
        // If there is a return value and it needs a helper function,
        // figure out which one and add a call before the actual
        // return to this helper. The purpose of the helper is to move
        // floating point values from their soft float return mapping to
        // where they would have been mapped to in floating point registers.
        //
        Type *T = RVal->getType();
        FPReturnVariant RV = whichFPReturnVariant(T);
        if (RV == NoFPRet) continue;
        static const char *const Helper[NoFPRet] = {
          "__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc",
          "__mips16_ret_dc"
        };
        const char *Name = Helper[RV];
        AttributeSet A;
        Value *Params[] = {RVal};
        Modified = true;
        //
        // These helper functions have a different calling ABI so
        // this __Mips16RetHelper indicates that so that later
        // during call setup, the proper call lowering to the helper
        // functions will take place.
        //
        A = A.addAttribute(C, AttributeSet::FunctionIndex,
                           "__Mips16RetHelper");
        A = A.addAttribute(C, AttributeSet::FunctionIndex,
                           Attribute::ReadNone);
        A = A.addAttribute(C, AttributeSet::FunctionIndex,
                           Attribute::NoInline);
        Value *F = (M->getOrInsertFunction(Name, A, MyVoid, T, nullptr));
        CallInst::Create(F, Params, "", &Inst );
      } else if (const CallInst *CI = dyn_cast<CallInst>(I)) {
        const Value* V = CI->getCalledValue();
        Type* T = nullptr;
        if (V) T = V->getType();
        PointerType *PFT = nullptr;
        if (T) PFT = dyn_cast<PointerType>(T);
        FunctionType *FT = nullptr;
        if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType());
        Function *F_ =  CI->getCalledFunction();
        if (FT && needsFPReturnHelper(*FT) &&
            !(F_ && isIntrinsicInline(F_))) {
          Modified=true;
          F.addFnAttr("saveS2");
        }
        if (F_ && !isIntrinsicInline(F_)) {
          // pic mode calls are handled by already defined
          // helper functions
          if (needsFPReturnHelper(*F_)) {
            Modified=true;
            F.addFnAttr("saveS2");
          }
          if (TM.getRelocationModel() != Reloc::PIC_ ) {
            if (needsFPHelperFromSig(*F_)) {
              assureFPCallStub(*F_, M, TM);
              Modified=true;
            }
          }
        }
      }
    }
  return Modified;
}
Example #12
0
void GNUstep::IMPCacher::SpeculativelyInline(Instruction *call, Function
        *function) {
    BasicBlock *beforeCallBB = call->getParent();
    BasicBlock *callBB = SplitBlock(beforeCallBB, call, Owner);
    BasicBlock *inlineBB = BasicBlock::Create(Context, "inline",
                           callBB->getParent());


    BasicBlock::iterator iter = call;
    iter++;

    BasicBlock *afterCallBB = SplitBlock(iter->getParent(), iter, Owner);

    removeTerminator(beforeCallBB);

    // Put a branch before the call, testing whether the callee really is the
    // function
    IRBuilder<> B = IRBuilder<>(beforeCallBB);
    Value *callee = isa<CallInst>(call) ? cast<CallInst>(call)->getCalledValue()
                    : cast<InvokeInst>(call)->getCalledValue();

    const FunctionType *FTy = function->getFunctionType();
    const FunctionType *calleeTy = cast<FunctionType>(
                                       cast<PointerType>(callee->getType())->getElementType());
    if (calleeTy != FTy) {
        callee = B.CreateBitCast(callee, function->getType());
    }

    Value *isInlineValid = B.CreateICmpEQ(callee, function);
    B.CreateCondBr(isInlineValid, inlineBB, callBB);

    // In the inline BB, add a copy of the call, but this time calling the real
    // version.
    Instruction *inlineCall = call->clone();
    Value *inlineResult= inlineCall;
    inlineBB->getInstList().push_back(inlineCall);

    B.SetInsertPoint(inlineBB);

    if (calleeTy != FTy) {
        for (unsigned i=0 ; i<FTy->getNumParams() ; i++) {
            LLVMType *callType = calleeTy->getParamType(i);
            LLVMType *argType = FTy->getParamType(i);
            if (callType != argType) {
                inlineCall->setOperand(i, new
                                       BitCastInst(inlineCall->getOperand(i), argType, "", inlineCall));
            }
        }
        if (FTy->getReturnType() != calleeTy->getReturnType()) {
            if (FTy->getReturnType() == Type::getVoidTy(Context)) {
                inlineResult = Constant::getNullValue(calleeTy->getReturnType());
            } else {
                inlineResult =
                    new BitCastInst(inlineCall, calleeTy->getReturnType(), "", inlineBB);
            }
        }
    }

    B.CreateBr(afterCallBB);

    // Unify the return values
    if (call->getType() != Type::getVoidTy(Context)) {
        PHINode *phi = CreatePHI(call->getType(), 2, "", afterCallBB->begin());
        call->replaceAllUsesWith(phi);
        phi->addIncoming(call, callBB);
        phi->addIncoming(inlineResult, inlineBB);
    }

    // Really do the real inlining
    InlineFunctionInfo IFI(0, 0);
    if (CallInst *c = dyn_cast<CallInst>(inlineCall)) {
        c->setCalledFunction(function);
        InlineFunction(c, IFI);
    } else if (InvokeInst *c = dyn_cast<InvokeInst>(inlineCall)) {
        c->setCalledFunction(function);
        InlineFunction(c, IFI);
    }
}
Value *PropagateJuliaAddrspaces::LiftPointer(Value *V, Type *LocTy, Instruction *InsertPt) {
    SmallVector<Value *, 4> Stack;
    Value *CurrentV = V;
    // Follow pointer casts back, see if we're based on a pointer in
    // an untracked address space, in which case we're allowed to drop
    // intermediate addrspace casts.
    while (true) {
        Stack.push_back(CurrentV);
        if (isa<BitCastInst>(CurrentV))
            CurrentV = cast<BitCastInst>(CurrentV)->getOperand(0);
        else if (isa<AddrSpaceCastInst>(CurrentV)) {
            CurrentV = cast<AddrSpaceCastInst>(CurrentV)->getOperand(0);
            if (!isSpecialAS(getValueAddrSpace(CurrentV)))
                break;
        }
        else if (isa<GetElementPtrInst>(CurrentV)) {
            if (LiftingMap.count(CurrentV)) {
                CurrentV = LiftingMap[CurrentV];
                break;
            } else if (Visited.count(CurrentV)) {
                return nullptr;
            }
            Visited.insert(CurrentV);
            CurrentV = cast<GetElementPtrInst>(CurrentV)->getOperand(0);
        } else
            break;
    }
    if (!CurrentV->getType()->isPointerTy())
        return nullptr;
    if (isSpecialAS(getValueAddrSpace(CurrentV)))
        return nullptr;
    // Ok, we're allowed to change the address space of this load, go back and
    // reconstitute any GEPs in the new address space.
    for (Value *V : llvm::reverse(Stack)) {
        GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V);
        if (!GEP)
            continue;
        if (LiftingMap.count(GEP)) {
            CurrentV = LiftingMap[GEP];
            continue;
        }
        GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(GEP->clone());
        ToInsert.push_back(std::make_pair(NewGEP, GEP));
        Type *GEPTy = GEP->getSourceElementType();
        Type *NewRetTy = cast<PointerType>(GEP->getType())->getElementType()->getPointerTo(getValueAddrSpace(CurrentV));
        NewGEP->mutateType(NewRetTy);
        if (cast<PointerType>(CurrentV->getType())->getElementType() != GEPTy) {
            auto *BCI = new BitCastInst(CurrentV, GEPTy->getPointerTo());
            ToInsert.push_back(std::make_pair(BCI, NewGEP));
            CurrentV = BCI;
        }
        NewGEP->setOperand(GetElementPtrInst::getPointerOperandIndex(), CurrentV);
        LiftingMap[GEP] = NewGEP;
        CurrentV = NewGEP;
    }
    if (LocTy && cast<PointerType>(CurrentV->getType())->getElementType() != LocTy) {
        auto *BCI = new BitCastInst(CurrentV, LocTy->getPointerTo());
        ToInsert.push_back(std::make_pair(BCI, InsertPt));
        CurrentV = BCI;
    }
    return CurrentV;
}
Example #14
0
/// \brief Check if executing a load of this pointer value cannot trap.
///
/// If it is not obviously safe to load from the specified pointer, we do
/// a quick local scan of the basic block containing \c ScanFrom, to determine
/// if the address is already accessed.
///
/// This uses the pointee type to determine how many bytes need to be safe to
/// load from the pointer.
bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
                                       unsigned Align) {
  const DataLayout &DL = ScanFrom->getModule()->getDataLayout();

  // Zero alignment means that the load has the ABI alignment for the target
  if (Align == 0)
    Align = DL.getABITypeAlignment(V->getType()->getPointerElementType());
  assert(isPowerOf2_32(Align));

  int64_t ByteOffset = 0;
  Value *Base = V;
  Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL);

  if (ByteOffset < 0) // out of bounds
    return false;

  Type *BaseType = nullptr;
  unsigned BaseAlign = 0;
  if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
    // An alloca is safe to load from as load as it is suitably aligned.
    BaseType = AI->getAllocatedType();
    BaseAlign = AI->getAlignment();
  } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
    // Global variables are not necessarily safe to load from if they are
    // overridden. Their size may change or they may be weak and require a test
    // to determine if they were in fact provided.
    if (!GV->mayBeOverridden()) {
      BaseType = GV->getType()->getElementType();
      BaseAlign = GV->getAlignment();
    }
  }

  PointerType *AddrTy = cast<PointerType>(V->getType());
  uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType());

  // If we found a base allocated type from either an alloca or global variable,
  // try to see if we are definitively within the allocated region. We need to
  // know the size of the base type and the loaded type to do anything in this
  // case.
  if (BaseType && BaseType->isSized()) {
    if (BaseAlign == 0)
      BaseAlign = DL.getPrefTypeAlignment(BaseType);

    if (Align <= BaseAlign) {
      // Check if the load is within the bounds of the underlying object.
      if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) &&
          ((ByteOffset % Align) == 0))
        return true;
    }
  }

  // Otherwise, be a little bit aggressive by scanning the local block where we
  // want to check to see if the pointer is already being loaded or stored
  // from/to.  If so, the previous load or store would have already trapped,
  // so there is no harm doing an extra load (also, CSE will later eliminate
  // the load entirely).
  BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();

  // We can at least always strip pointer casts even though we can't use the
  // base here.
  V = V->stripPointerCasts();

  while (BBI != E) {
    --BBI;

    // If we see a free or a call which may write to memory (i.e. which might do
    // a free) the pointer could be marked invalid.
    if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
        !isa<DbgInfoIntrinsic>(BBI))
      return false;

    Value *AccessedPtr;
    unsigned AccessedAlign;
    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
      AccessedPtr = LI->getPointerOperand();
      AccessedAlign = LI->getAlignment();
    } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
      AccessedPtr = SI->getPointerOperand();
      AccessedAlign = SI->getAlignment();
    } else
      continue;

    Type *AccessedTy = AccessedPtr->getType()->getPointerElementType();
    if (AccessedAlign == 0)
      AccessedAlign = DL.getABITypeAlignment(AccessedTy);
    if (AccessedAlign < Align)
      continue;

    // Handle trivial cases.
    if (AccessedPtr == V)
      return true;

    if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
        LoadSize <= DL.getTypeStoreSize(AccessedTy))
      return true;
  }
  return false;
}
Example #15
0
bool DSE::runOnBasicBlock(BasicBlock &BB) {
  bool MadeChange = false;

  // Do a top-down walk on the BB.
  for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
    Instruction *Inst = BBI++;

    // Handle 'free' calls specially.
    if (CallInst *F = isFreeCall(Inst)) {
      MadeChange |= HandleFree(F);
      continue;
    }

    // If we find something that writes memory, get its memory dependence.
    if (!hasMemoryWrite(Inst))
      continue;

    MemDepResult InstDep = MD->getDependency(Inst);

    // Ignore any store where we can't find a local dependence.
    // FIXME: cross-block DSE would be fun. :)
    if (!InstDep.isDef() && !InstDep.isClobber())
      continue;

    // If we're storing the same value back to a pointer that we just
    // loaded from, then the store can be removed.
    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
      if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
        if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
            SI->getOperand(0) == DepLoad && isRemovable(SI)) {
          DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  "
                       << "LOAD: " << *DepLoad << "\n  STORE: " << *SI << '\n');

          // DeleteDeadInstruction can delete the current instruction.  Save BBI
          // in case we need it.
          WeakVH NextInst(BBI);

          DeleteDeadInstruction(SI, *MD);

          if (NextInst == 0)  // Next instruction deleted.
            BBI = BB.begin();
          else if (BBI != BB.begin())  // Revisit this instruction if possible.
            --BBI;
          ++NumFastStores;
          MadeChange = true;
          continue;
        }
      }
    }

    // Figure out what location is being stored to.
    AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);

    // If we didn't get a useful location, fail.
    if (Loc.Ptr == 0)
      continue;

    while (InstDep.isDef() || InstDep.isClobber()) {
      // Get the memory clobbered by the instruction we depend on.  MemDep will
      // skip any instructions that 'Loc' clearly doesn't interact with.  If we
      // end up depending on a may- or must-aliased load, then we can't optimize
      // away the store and we bail out.  However, if we depend on on something
      // that overwrites the memory location we *can* potentially optimize it.
      //
      // Find out what memory location the dependent instruction stores.
      Instruction *DepWrite = InstDep.getInst();
      AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
      // If we didn't get a useful location, or if it isn't a size, bail out.
      if (DepLoc.Ptr == 0)
        break;

      // If we find a write that is a) removable (i.e., non-volatile), b) is
      // completely obliterated by the store to 'Loc', and c) which we know that
      // 'Inst' doesn't load from, then we can remove it.
      if (isRemovable(DepWrite) && 
          !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
        int64_t InstWriteOffset, DepWriteOffset; 
        OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA, 
                                         DepWriteOffset, InstWriteOffset); 
        if (OR == OverwriteComplete) {
          DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
                << *DepWrite << "\n  KILLER: " << *Inst << '\n');

          // Delete the store and now-dead instructions that feed it.
          DeleteDeadInstruction(DepWrite, *MD);
          ++NumFastStores;
          MadeChange = true;
          
          // DeleteDeadInstruction can delete the current instruction in loop
          // cases, reset BBI.
          BBI = Inst;
          if (BBI != BB.begin())
            --BBI;
          break;
        } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
          // TODO: base this on the target vector size so that if the earlier
          // store was too small to get vector writes anyway then its likely
          // a good idea to shorten it
          // Power of 2 vector writes are probably always a bad idea to optimize
          // as any store/memset/memcpy is likely using vector instructions so
          // shortening it to not vector size is likely to be slower
          MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
          unsigned DepWriteAlign = DepIntrinsic->getAlignment();
          if (llvm::isPowerOf2_64(InstWriteOffset) ||
              ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
            
            DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW END: "
                  << *DepWrite << "\n  KILLER (offset " 
                  << InstWriteOffset << ", " 
                  << DepLoc.Size << ")"
                  << *Inst << '\n');
            
            Value* DepWriteLength = DepIntrinsic->getLength();
            Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
                                                    InstWriteOffset - 
                                                    DepWriteOffset);
            DepIntrinsic->setLength(TrimmedLength);
            MadeChange = true;
          }
        }
      }

      // If this is a may-aliased store that is clobbering the store value, we
      // can keep searching past it for another must-aliased pointer that stores
      // to the same location.  For example, in:
      //   store -> P
      //   store -> Q
      //   store -> P
      // we can remove the first store to P even though we don't know if P and Q
      // alias.
      if (DepWrite == &BB.front()) break;

      // Can't look past this instruction if it might read 'Loc'.
      if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
        break;

      InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
    }
  }

  // If this block ends in a return, unwind, or unreachable, all allocas are
  // dead at its end, which means stores to them are also dead.
  if (BB.getTerminator()->getNumSuccessors() == 0)
    MadeChange |= handleEndBlock(BB);

  return MadeChange;
}
Example #16
0
void FunctionCodegen(TiXmlElement *procedure, LLVMContext &context, IRBuilder<> *builder)
{
	const char *proc_name = procedure->Attribute("Name");
	std::vector<Type*> VarArgs;
	std::vector<std::string> VarNames;

	TiXmlElement *form_pars = procedure->FirstChildElement("FormalParameters");
	for (TiXmlElement *form_par = form_pars->FirstChildElement("FormalParameter");
		form_par; form_par = form_par->NextSiblingElement("FormalParameter"))
	{
		const char *form_par_name = form_par->Attribute("Name");
		const char *form_par_type = form_par->Attribute("Type");

		if ((std::string)form_par_type == "INTEGER")
			VarArgs.push_back(Type::getInt32Ty(getGlobalContext()));
		else if ((std::string)form_par_type == "REAL")
			VarArgs.push_back(Type::getDoubleTy(getGlobalContext()));
		else
		{
			// TODO: Other types <<<<<
		}

		VarNames.push_back(form_par_name); // remember the name of the variable
	}

	FunctionType *FT_temp = FunctionType::get(Type::getVoidTy(getGlobalContext()),VarArgs,false);
	Function *F_temp = Function::Create(FT_temp, 
		Function::ExternalLinkage, proc_name, TheModule);



	BasicBlock *BB_temp = BasicBlock::Create(getGlobalContext(), 
		"entry " + (std::string)proc_name + ":", F_temp);
	Builder.SetInsertPoint(BB_temp);

	ValueSymbolTable &VST = F_temp->getValueSymbolTable();


	// Internal variables
	TiXmlElement *proc_declarations = procedure->FirstChildElement("Declarations");
	for (TiXmlElement *proc_var = proc_declarations->FirstChildElement("Variable");
		proc_var; proc_var = proc_var->NextSiblingElement("Variable"))
	{
		const char *proc_var_name = proc_var->Attribute("Name");
		const char *proc_var_type = proc_var->Attribute("Type");

		AllocaInst *Alloca;
		
		if ((std::string)proc_var_type == "INTEGER")
		{
			Alloca = builder->CreateAlloca(Type::getInt32Ty(getGlobalContext()),0,
				(std::string)proc_var_name);
			//CurVar = Builder.CreateLoad(Alloca, proc_var_name);
		}
		else if ((std::string)proc_var_type == "REAL")
		{
			Alloca = builder->CreateAlloca(Type::getDoubleTy(getGlobalContext()),0,
				(std::string)proc_var_name);
			//CurVar = Builder.CreateLoad(Alloca, proc_var_name);
		}
		else
		{
			// TODO: Other types <<<<<
		}

	}

	//Body parsing
	TiXmlElement *proc_body = procedure->FirstChildElement("Body");
	for (TiXmlElement *proc_op = proc_body->FirstChildElement("Operator");
		proc_op; proc_op = proc_op->NextSiblingElement("Operator"))
	{
		const char *proc_op_typename = proc_op->Attribute("TypeName");
		
		if ((std::string) proc_op_typename == "Assign")
		{
			OperatorAssignCodegen(proc_op, context, builder, F_temp);
			F_temp->dump();
		}
		else
		{
			// TODO: Other types <<<<<
		}
	}



	//VST.dump();
	if (false)
	{
		// Test area
		AllocaInst *ptest = builder->CreateAlloca(Type::getInt32Ty(getGlobalContext()),0,
			"ptest");
		AllocaInst *jtest =(AllocaInst*) VST.lookup("j");
		Value *ctest = ConstantInt::get(getGlobalContext(),APInt(32,111,false));
		Value *cctest = ConstantInt::get(getGlobalContext(),APInt(32,777,true));
		AllocaInst *rrrr = builder->CreateAlloca(Type::getDoubleTy(getGlobalContext()),0,
			"rrrr");
		Value *cccc = ConstantFP::get(getGlobalContext(), APFloat(123.01));

		builder->CreateStore(cccc, rrrr);

		

		builder->CreateStore(ctest, ptest);
		builder->CreateStore(ctest, jtest);
		builder->CreateStore(cctest, jtest);

		Value *jrez = VST.lookup("j");
		Value *jrezw = builder->CreateLoad(jrez,"j");

		builder->CreateStore(cctest, jrez);
		//builder->CreateBinOp(Instruction::Add, ctest, cctest, "rezzzz");
		cctest->getType()->isPointerTy();
		jrez->getType()->dump();
		jtest->getType()->dump();
		Value *ffff = builder->CreateAdd(cctest, jrezw, "ffff");
		Value *gggg = builder->CreateAdd(jrezw, ffff, "gggg");
		gggg = builder->CreateAdd(jrezw, cctest, "gggg");
		jrezw = builder->CreateAdd(ffff, gggg, "jrezw");
		VST.lookup("j")->dump();
		jrezw->dump();
		Value *jrezww = builder->CreateLoad(VST.lookup("j"),"j");
		jrezww->dump();
		//rez->dump();


		//IRBuilder<> smallBuild(BB_temp);
		//Value *tmp = smallBuild.CreateBinOp(Instruction::Add,
		//	ctest, cctest, "tmp111");

	}
	F_temp->dump();



	






}
bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
                                       BasicBlock *&OldEntry,
                                       bool &TailCallsAreMarkedTail,
                                       SmallVectorImpl<PHINode *> &ArgumentPHIs,
                                       bool CannotTailCallElimCallsMarkedTail) {
  // If we are introducing accumulator recursion to eliminate operations after
  // the call instruction that are both associative and commutative, the initial
  // value for the accumulator is placed in this variable.  If this value is set
  // then we actually perform accumulator recursion elimination instead of
  // simple tail recursion elimination.  If the operation is an LLVM instruction
  // (eg: "add") then it is recorded in AccumulatorRecursionInstr.  If not, then
  // we are handling the case when the return instruction returns a constant C
  // which is different to the constant returned by other return instructions
  // (which is recorded in AccumulatorRecursionEliminationInitVal).  This is a
  // special case of accumulator recursion, the operation being "return C".
  Value *AccumulatorRecursionEliminationInitVal = nullptr;
  Instruction *AccumulatorRecursionInstr = nullptr;

  // Ok, we found a potential tail call.  We can currently only transform the
  // tail call if all of the instructions between the call and the return are
  // movable to above the call itself, leaving the call next to the return.
  // Check that this is the case now.
  BasicBlock::iterator BBI = CI;
  for (++BBI; &*BBI != Ret; ++BBI) {
    if (CanMoveAboveCall(BBI, CI)) continue;

    // If we can't move the instruction above the call, it might be because it
    // is an associative and commutative operation that could be transformed
    // using accumulator recursion elimination.  Check to see if this is the
    // case, and if so, remember the initial accumulator value for later.
    if ((AccumulatorRecursionEliminationInitVal =
                           CanTransformAccumulatorRecursion(BBI, CI))) {
      // Yes, this is accumulator recursion.  Remember which instruction
      // accumulates.
      AccumulatorRecursionInstr = BBI;
    } else {
      return false;   // Otherwise, we cannot eliminate the tail recursion!
    }
  }

  // We can only transform call/return pairs that either ignore the return value
  // of the call and return void, ignore the value of the call and return a
  // constant, return the value returned by the tail call, or that are being
  // accumulator recursion variable eliminated.
  if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
      !isa<UndefValue>(Ret->getReturnValue()) &&
      AccumulatorRecursionEliminationInitVal == nullptr &&
      !getCommonReturnValue(nullptr, CI)) {
    // One case remains that we are able to handle: the current return
    // instruction returns a constant, and all other return instructions
    // return a different constant.
    if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret))
      return false; // Current return instruction does not return a constant.
    // Check that all other return instructions return a common constant.  If
    // so, record it in AccumulatorRecursionEliminationInitVal.
    AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI);
    if (!AccumulatorRecursionEliminationInitVal)
      return false;
  }

  BasicBlock *BB = Ret->getParent();
  Function *F = BB->getParent();

  emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(),
                         "transforming tail recursion to loop");

  // OK! We can transform this tail call.  If this is the first one found,
  // create the new entry block, allowing us to branch back to the old entry.
  if (!OldEntry) {
    OldEntry = &F->getEntryBlock();
    BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
    NewEntry->takeName(OldEntry);
    OldEntry->setName("tailrecurse");
    BranchInst::Create(OldEntry, NewEntry);

    // If this tail call is marked 'tail' and if there are any allocas in the
    // entry block, move them up to the new entry block.
    TailCallsAreMarkedTail = CI->isTailCall();
    if (TailCallsAreMarkedTail)
      // Move all fixed sized allocas from OldEntry to NewEntry.
      for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(),
             NEBI = NewEntry->begin(); OEBI != E; )
        if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
          if (isa<ConstantInt>(AI->getArraySize()))
            AI->moveBefore(NEBI);

    // Now that we have created a new block, which jumps to the entry
    // block, insert a PHI node for each argument of the function.
    // For now, we initialize each PHI to only have the real arguments
    // which are passed in.
    Instruction *InsertPos = OldEntry->begin();
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
         I != E; ++I) {
      PHINode *PN = PHINode::Create(I->getType(), 2,
                                    I->getName() + ".tr", InsertPos);
      I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
      PN->addIncoming(I, NewEntry);
      ArgumentPHIs.push_back(PN);
    }
  }

  // If this function has self recursive calls in the tail position where some
  // are marked tail and some are not, only transform one flavor or another.  We
  // have to choose whether we move allocas in the entry block to the new entry
  // block or not, so we can't make a good choice for both.  NOTE: We could do
  // slightly better here in the case that the function has no entry block
  // allocas.
  if (TailCallsAreMarkedTail && !CI->isTailCall())
    return false;

  // Ok, now that we know we have a pseudo-entry block WITH all of the
  // required PHI nodes, add entries into the PHI node for the actual
  // parameters passed into the tail-recursive call.
  for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
    ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);

  // If we are introducing an accumulator variable to eliminate the recursion,
  // do so now.  Note that we _know_ that no subsequent tail recursion
  // eliminations will happen on this function because of the way the
  // accumulator recursion predicate is set up.
  //
  if (AccumulatorRecursionEliminationInitVal) {
    Instruction *AccRecInstr = AccumulatorRecursionInstr;
    // Start by inserting a new PHI node for the accumulator.
    pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
    PHINode *AccPN =
      PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
                      std::distance(PB, PE) + 1,
                      "accumulator.tr", OldEntry->begin());

    // Loop over all of the predecessors of the tail recursion block.  For the
    // real entry into the function we seed the PHI with the initial value,
    // computed earlier.  For any other existing branches to this block (due to
    // other tail recursions eliminated) the accumulator is not modified.
    // Because we haven't added the branch in the current block to OldEntry yet,
    // it will not show up as a predecessor.
    for (pred_iterator PI = PB; PI != PE; ++PI) {
      BasicBlock *P = *PI;
      if (P == &F->getEntryBlock())
        AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
      else
        AccPN->addIncoming(AccPN, P);
    }

    if (AccRecInstr) {
      // Add an incoming argument for the current block, which is computed by
      // our associative and commutative accumulator instruction.
      AccPN->addIncoming(AccRecInstr, BB);

      // Next, rewrite the accumulator recursion instruction so that it does not
      // use the result of the call anymore, instead, use the PHI node we just
      // inserted.
      AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
    } else {
      // Add an incoming argument for the current block, which is just the
      // constant returned by the current return instruction.
      AccPN->addIncoming(Ret->getReturnValue(), BB);
    }

    // Finally, rewrite any return instructions in the program to return the PHI
    // node instead of the "initval" that they do currently.  This loop will
    // actually rewrite the return value we are destroying, but that's ok.
    for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
      if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
        RI->setOperand(0, AccPN);
    ++NumAccumAdded;
  }

  // Now that all of the PHI nodes are in place, remove the call and
  // ret instructions, replacing them with an unconditional branch.
  BranchInst *NewBI = BranchInst::Create(OldEntry, Ret);
  NewBI->setDebugLoc(CI->getDebugLoc());

  BB->getInstList().erase(Ret);  // Remove return.
  BB->getInstList().erase(CI);   // Remove call.
  ++NumEliminated;
  return true;
}
void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
      ValueToNodeMap &NM) {
  DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n');
  GepNode *N = new (*Mem) GepNode;
  Value *PtrOp = GepI->getPointerOperand();
  ValueToNodeMap::iterator F = NM.find(PtrOp);
  if (F == NM.end()) {
    N->BaseVal = PtrOp;
    N->Flags |= GepNode::Root;
  } else {
    // If PtrOp was a GEP instruction, it must have already been processed.
    // The ValueToNodeMap entry for it is the last gep node in the generated
    // chain. Link to it here.
    N->Parent = F->second;
  }
  N->PTy = PtrOp->getType();
  N->Idx = *GepI->idx_begin();

  // Collect the list of users of this GEP instruction. Will add it to the
  // last node created for it.
  UseSet Us;
  for (Value::user_iterator UI = GepI->user_begin(), UE = GepI->user_end();
       UI != UE; ++UI) {
    // Check if this gep is used by anything other than other geps that
    // we will process.
    if (isa<GetElementPtrInst>(*UI)) {
      GetElementPtrInst *UserG = cast<GetElementPtrInst>(*UI);
      if (isHandledGepForm(UserG))
        continue;
    }
    Us.insert(&UI.getUse());
  }
  Nodes.push_back(N);
  NodeOrder.insert(N);

  // Skip the first index operand, since we only handle 0. This dereferences
  // the pointer operand.
  GepNode *PN = N;
  Type *PtrTy = cast<PointerType>(PtrOp->getType())->getElementType();
  for (User::op_iterator OI = GepI->idx_begin()+1, OE = GepI->idx_end();
       OI != OE; ++OI) {
    Value *Op = *OI;
    GepNode *Nx = new (*Mem) GepNode;
    Nx->Parent = PN;  // Link Nx to the previous node.
    Nx->Flags |= GepNode::Internal;
    Nx->PTy = PtrTy;
    Nx->Idx = Op;
    Nodes.push_back(Nx);
    NodeOrder.insert(Nx);
    PN = Nx;

    PtrTy = next_type(PtrTy, Op);
  }

  // After last node has been created, update the use information.
  if (!Us.empty()) {
    PN->Flags |= GepNode::Used;
    Uses[PN].insert(Us.begin(), Us.end());
  }

  // Link the last node with the originating GEP instruction. This is to
  // help with linking chained GEP instructions.
  NM.insert(std::make_pair(GepI, PN));
}
Example #19
0
/// This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
  const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
  // Find out what feeds this byval argument.
  Value *ByValArg = CS.getArgument(ArgNo);
  Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
  uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
  MemDepResult DepInfo = MD->getPointerDependencyFrom(
      MemoryLocation(ByValArg, ByValSize), true, CS.getInstruction(),
      CS.getInstruction()->getParent());
  if (!DepInfo.isClobber())
    return false;

  // If the byval argument isn't fed by a memcpy, ignore it.  If it is fed by
  // a memcpy, see if we can byval from the source of the memcpy instead of the
  // result.
  MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
  if (!MDep || MDep->isVolatile() ||
      ByValArg->stripPointerCasts() != MDep->getDest())
    return false;

  // The length of the memcpy must be larger or equal to the size of the byval.
  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
  if (!C1 || C1->getValue().getZExtValue() < ByValSize)
    return false;

  // Get the alignment of the byval.  If the call doesn't specify the alignment,
  // then it is some target specific value that we can't know.
  unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
  if (ByValAlign == 0) return false;

  // If it is greater than the memcpy, then we check to see if we can force the
  // source of the memcpy to the alignment we need.  If we fail, we bail out.
  AssumptionCache &AC =
      getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
          *CS->getParent()->getParent());
  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  if (MDep->getAlignment() < ByValAlign &&
      getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
                                 CS.getInstruction(), &AC, &DT) < ByValAlign)
    return false;

  // Verify that the copied-from memory doesn't change in between the memcpy and
  // the byval call.
  //    memcpy(a <- b)
  //    *b = 42;
  //    foo(*a)
  // It would be invalid to transform the second memcpy into foo(*b).
  //
  // NOTE: This is conservative, it will stop on any read from the source loc,
  // not just the defining memcpy.
  MemDepResult SourceDep =
      MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
                                   CS.getInstruction(), MDep->getParent());
  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
    return false;

  Value *TmpCast = MDep->getSource();
  if (MDep->getSource()->getType() != ByValArg->getType())
    TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
                              "tmpcast", CS.getInstruction());

  DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
               << "  " << *MDep << "\n"
               << "  " << *CS.getInstruction() << "\n");

  // Otherwise we're good!  Update the byval argument.
  CS.setArgument(ArgNo, TmpCast);
  ++NumMemCpyInstr;
  return true;
}
Example #20
0
//
// Method: buildBounce()
//
// Description:
//  Replaces the given call site with a call to a bounce function.  The
//  bounce function compares the function pointer to one of the given
//  target functions and calls the function directly if the pointer
//  matches.
//
Function*
Devirtualize::buildBounce (CallSite CS, std::vector<const Function*>& Targets) {
  //
  // Update the statistics on the number of bounce functions added to the
  // module.
  //
  ++FuncAdded;
  //
  // Create a bounce function that has a function signature almost identical
  // to the function being called.  The only difference is that it will have
  // an additional pointer argument at the beginning of its argument list that
  // will be the function to call.
  //
  Value* ptr = CS.getCalledValue();
  std::vector<Type *> TP;
  TP.insert (TP.begin(), ptr->getType());
  for (CallSite::arg_iterator i = CS.arg_begin();
       i != CS.arg_end();
       ++i) {
    TP.push_back ((*i)->getType());
  }

  FunctionType* NewTy = FunctionType::get(CS.getType(), TP, false);
  Module * M = CS.getInstruction()->getParent()->getParent()->getParent();
  Function* F = Function::Create (NewTy,
                                  GlobalValue::InternalLinkage,
                                  "devirtbounce",
                                  M);

  //
  // Set the names of the arguments.  Also, record the arguments in a vector
  // for subsequence access.
  //
  F->arg_begin()->setName("funcPtr");
  std::vector<Value*> fargs;
  for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; ++ai)
    if (ai != F->arg_begin()) {
      fargs.push_back(ai);
      ai->setName("arg");
    }

  //
  // Create an entry basic block for the function.  All it should do is perform
  // some cast instructions and branch to the first comparison basic block.
  //
  BasicBlock* entryBB = BasicBlock::Create (M->getContext(), "entry", F);

  //
  // For each function target, create a basic block that will call that
  // function directly.
  //
  std::map<const Function*, BasicBlock*> targets;
  for (unsigned index = 0; index < Targets.size(); ++index) {
    const Function* FL = Targets[index];

    // Create the basic block for doing the direct call
    BasicBlock* BL = BasicBlock::Create (M->getContext(), FL->getName(), F);
    targets[FL] = BL;
    // Create the direct function call
    Value* directCall = CallInst::Create (const_cast<Function*>(FL),
                                          fargs,
                                          "",
                                          BL);

    // Add the return instruction for the basic block
    if (CS.getType()->isVoidTy())
      ReturnInst::Create (M->getContext(), BL);
    else
      ReturnInst::Create (M->getContext(), directCall, BL);
  }

  //
  // Create a failure basic block.  This basic block should simply be an
  // unreachable instruction.
  //
  BasicBlock * failBB = BasicBlock::Create (M->getContext(),
                                            "fail",
                                            F);
  new UnreachableInst (M->getContext(), failBB);

  //
  // Setup the entry basic block.  For now, just have it call the failure
  // basic block.  We'll change the basic block to which it branches later.
  //
  BranchInst * InsertPt = BranchInst::Create (failBB, entryBB);

  //
  // Create basic blocks which will test the value of the incoming function
  // pointer and branch to the appropriate basic block to call the function.
  //
  Type * VoidPtrType = getVoidPtrType (M->getContext());
  Value * FArg = castTo (F->arg_begin(), VoidPtrType, "", InsertPt);
  BasicBlock * tailBB = failBB;
  for (unsigned index = 0; index < Targets.size(); ++index) {
    //
    // Cast the function pointer to an integer.  This can go in the entry
    // block.
    //
    Value * TargetInt = castTo (const_cast<Function*>(Targets[index]),
                                VoidPtrType,
                                "",
                                InsertPt);

    //
    // Create a new basic block that compares the function pointer to the
    // function target.  If the function pointer matches, we'll branch to the
    // basic block performing the direct call for that function; otherwise,
    // we'll branch to the next function call target.
    //
    BasicBlock* TB = targets[Targets[index]];
    BasicBlock* newB = BasicBlock::Create (M->getContext(),
                                           "test." + Targets[index]->getName(),
                                           F);
    CmpInst * setcc = CmpInst::Create (Instruction::ICmp,
                                       CmpInst::ICMP_EQ,
                                       TargetInt,
                                       FArg,
                                       "sc",
                                       newB);
    BranchInst::Create (TB, tailBB, setcc, newB);

    //
    // Make this newly created basic block the next block that will be reached
    // when the next comparison will need to be done.
    //
    tailBB = newB;
  }

  //
  // Make the entry basic block branch to the first comparison basic block.
  //
  //InsertPt->setUnconditionalDest (tailBB);
  InsertPt->setSuccessor(0, tailBB);
  InsertPt->setSuccessor(1, tailBB);
  //
  // Return the newly created bounce function.
  //
  return F;
}
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
  Value *Op = LI.getOperand(0);

  // Attempt to improve the alignment.
  if (TD) {
    unsigned KnownAlign =
      GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
    unsigned LoadAlign = LI.getAlignment();
    unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
      TD->getABITypeAlignment(LI.getType());

    if (KnownAlign > EffectiveLoadAlign)
      LI.setAlignment(KnownAlign);
    else if (LoadAlign == 0)
      LI.setAlignment(EffectiveLoadAlign);
  }

  // load (cast X) --> cast (load X) iff safe.
  if (isa<CastInst>(Op))
    if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
      return Res;

  // None of the following transforms are legal for volatile loads.
  if (LI.isVolatile()) return 0;
  
  // Do really simple store-to-load forwarding and load CSE, to catch cases
  // where there are several consequtive memory accesses to the same location,
  // separated by a few arithmetic operations.
  BasicBlock::iterator BBI = &LI;
  if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
    return ReplaceInstUsesWith(LI, AvailableVal);

  // load(gep null, ...) -> unreachable
  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
    const Value *GEPI0 = GEPI->getOperand(0);
    // TODO: Consider a target hook for valid address spaces for this xform.
    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
      // Insert a new store to null instruction before the load to indicate
      // that this code is not reachable.  We do this instead of inserting
      // an unreachable instruction directly because we cannot modify the
      // CFG.
      new StoreInst(UndefValue::get(LI.getType()),
                    Constant::getNullValue(Op->getType()), &LI);
      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
    }
  } 

  // load null/undef -> unreachable
  // TODO: Consider a target hook for valid address spaces for this xform.
  if (isa<UndefValue>(Op) ||
      (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
    // Insert a new store to null instruction before the load to indicate that
    // this code is not reachable.  We do this instead of inserting an
    // unreachable instruction directly because we cannot modify the CFG.
    new StoreInst(UndefValue::get(LI.getType()),
                  Constant::getNullValue(Op->getType()), &LI);
    return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
  }

  // Instcombine load (constantexpr_cast global) -> cast (load global)
  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
    if (CE->isCast())
      if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
        return Res;
  
  if (Op->hasOneUse()) {
    // Change select and PHI nodes to select values instead of addresses: this
    // helps alias analysis out a lot, allows many others simplifications, and
    // exposes redundancy in the code.
    //
    // Note that we cannot do the transformation unless we know that the
    // introduced loads cannot trap!  Something like this is valid as long as
    // the condition is always false: load (select bool %C, int* null, int* %G),
    // but it would not be valid if we transformed it to load from null
    // unconditionally.
    //
    if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
      // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
      unsigned Align = LI.getAlignment();
      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) &&
          isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) {
        LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
                                           SI->getOperand(1)->getName()+".val");
        LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
                                           SI->getOperand(2)->getName()+".val");
        V1->setAlignment(Align);
        V2->setAlignment(Align);
        return SelectInst::Create(SI->getCondition(), V1, V2);
      }

      // load (select (cond, null, P)) -> load P
      if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
        if (C->isNullValue()) {
          LI.setOperand(0, SI->getOperand(2));
          return &LI;
        }

      // load (select (cond, P, null)) -> load P
      if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
        if (C->isNullValue()) {
          LI.setOperand(0, SI->getOperand(1));
          return &LI;
        }
    }
  }
  return 0;
}
Value* TripCountProfiler::getValueAtEntryPoint(Value* source, BasicBlock* loopHeader) {

    LoopInfoEx& li = getAnalysis<LoopInfoEx>();
    LoopNormalizerAnalysis& ln = getAnalysis<LoopNormalizerAnalysis>();

    Loop* loop = li.getLoopFor(loopHeader);

    //Option 1: Loop invariant. Return the value itself
    if (loop->isLoopInvariant(source)) return source;

    //Option 2: Sequence of redefinitions with PHI node in the loop header. Return the incoming value from the entry block
    LoopControllersDepGraph& lcd = getAnalysis<LoopControllersDepGraph>();
    GraphNode* node = lcd.depGraph->findNode(source);
    if (!node) {
        return NULL;
    }

    int SCCID = lcd.depGraph->getSCCID(node);
    Graph sccGraph = lcd.depGraph->generateSubGraph(SCCID);

    for(Graph::iterator it =  sccGraph.begin(); it != sccGraph.end(); it++) {

        Value* V = NULL;

        if (VarNode* VN = dyn_cast<VarNode>(*it)) {
            V = VN->getValue();
        } else	if (OpNode* ON = dyn_cast<OpNode>(*it)) {
            V = ON->getValue();
        }

        if (V) {
            if (PHINode* PHI = dyn_cast<PHINode>(V)) {
                if(PHI->getParent() == loopHeader ) {

                    Value* IncomingFromEntry = PHI->getIncomingValueForBlock(ln.entryBlocks[loopHeader]);
                    return IncomingFromEntry;

                }
            }
        }
    }

    Instruction *InstToCopy = NULL;

    //Option 3: Sequence of loads/stores in the same memory location. Create load in the entry block and return the loaded value
    if (LoadInst* LI = dyn_cast<LoadInst>(source)) {
        InstToCopy = LI;
    }


    //Option 4: Cast Instruction (Bitcast, Zext, SExt, Trunc etc...): Propagate search (The value theoretically is the same)
    if (CastInst* CI = dyn_cast<CastInst>(source)) {
        return getValueAtEntryPoint(CI->getOperand(0), loopHeader);
    }

    //Option 5: GetElementPTR - Create a similar getElementPtr in the entry block
    if (GetElementPtrInst* GEPI = dyn_cast<GetElementPtrInst>(source)) {

        // Do the copy only if all the operands are loop-invariant
        bool isInvariant = true;

        for(unsigned int i = 0; i < GEPI->getNumOperands(); i++) {
            if (!loop->isLoopInvariant(GEPI->getOperand(i))) {
                isInvariant = false;
                break;
            }
        }

        if (isInvariant) InstToCopy = GEPI;
    }



    //Here we try to copy the instruction in the entry block
    //we adjust the operands  to the value dominate all of its uses.
    if (InstToCopy) {

        unsigned int prev_size = ln.entryBlocks[loopHeader]->getInstList().size();

        Instruction* NEW_INST = InstToCopy->clone();
        ln.entryBlocks[loopHeader]->getInstList().insert(ln.entryBlocks[loopHeader]->getFirstInsertionPt(), NEW_INST);


        for(unsigned int i = 0; i < InstToCopy->getNumOperands(); i++) {

            Value* op = getValueAtEntryPoint(InstToCopy->getOperand(i), loopHeader);

            if (op) {
                if (op->getType() != InstToCopy->getOperand(i)->getType()) op = NULL;
            }

            if (!op) {

                //Undo changes in the entry block
                while (ln.entryBlocks[loopHeader]->getInstList().size() > prev_size) {
                    ln.entryBlocks[loopHeader]->getFirstInsertionPt()->eraseFromParent();
                }

                return NULL;
            }

            NEW_INST->setOperand(i, op);
        }

        return NEW_INST;
    }

    //Option 9999: unknown. Return NULL
    return NULL;
}
Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
  Value *Val = SI.getOperand(0);
  Value *Ptr = SI.getOperand(1);

  // If the RHS is an alloca with a single use, zapify the store, making the
  // alloca dead.
  // If the RHS is an alloca with a two uses, the other one being a 
  // llvm.dbg.declare, zapify the store and the declare, making the
  // alloca dead.  We must do this to prevent declares from affecting
  // codegen.
  if (!SI.isVolatile()) {
    if (Ptr->hasOneUse()) {
      if (isa<AllocaInst>(Ptr)) 
        return EraseInstFromFunction(SI);
      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
        if (isa<AllocaInst>(GEP->getOperand(0))) {
          if (GEP->getOperand(0)->hasOneUse())
            return EraseInstFromFunction(SI);
          if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) {
            EraseInstFromFunction(*DI);
            return EraseInstFromFunction(SI);
          }
        }
      }
    }
    if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) {
      EraseInstFromFunction(*DI);
      return EraseInstFromFunction(SI);
    }
  }

  // Attempt to improve the alignment.
  if (TD) {
    unsigned KnownAlign =
      GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
    unsigned StoreAlign = SI.getAlignment();
    unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
      TD->getABITypeAlignment(Val->getType());

    if (KnownAlign > EffectiveStoreAlign)
      SI.setAlignment(KnownAlign);
    else if (StoreAlign == 0)
      SI.setAlignment(EffectiveStoreAlign);
  }

  // Do really simple DSE, to catch cases where there are several consecutive
  // stores to the same location, separated by a few arithmetic operations. This
  // situation often occurs with bitfield accesses.
  BasicBlock::iterator BBI = &SI;
  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
       --ScanInsts) {
    --BBI;
    // Don't count debug info directives, lest they affect codegen,
    // and we skip pointer-to-pointer bitcasts, which are NOPs.
    if (isa<DbgInfoIntrinsic>(BBI) ||
        (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
      ScanInsts++;
      continue;
    }    
    
    if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
      // Prev store isn't volatile, and stores to the same location?
      if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
                                                          SI.getOperand(1))) {
        ++NumDeadStore;
        ++BBI;
        EraseInstFromFunction(*PrevSI);
        continue;
      }
      break;
    }
    
    // If this is a load, we have to stop.  However, if the loaded value is from
    // the pointer we're loading and is producing the pointer we're storing,
    // then *this* store is dead (X = load P; store X -> P).
    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
          !SI.isVolatile())
        return EraseInstFromFunction(SI);
      
      // Otherwise, this is a load from some other location.  Stores before it
      // may not be dead.
      break;
    }
    
    // Don't skip over loads or things that can modify memory.
    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
      break;
  }
  
  
  if (SI.isVolatile()) return 0;  // Don't hack volatile stores.

  // store X, null    -> turns into 'unreachable' in SimplifyCFG
  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
    if (!isa<UndefValue>(Val)) {
      SI.setOperand(0, UndefValue::get(Val->getType()));
      if (Instruction *U = dyn_cast<Instruction>(Val))
        Worklist.Add(U);  // Dropped a use.
    }
    return 0;  // Do not modify these!
  }

  // store undef, Ptr -> noop
  if (isa<UndefValue>(Val))
    return EraseInstFromFunction(SI);

  // If the pointer destination is a cast, see if we can fold the cast into the
  // source instead.
  if (isa<CastInst>(Ptr))
    if (Instruction *Res = InstCombineStoreToCast(*this, SI))
      return Res;
  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
    if (CE->isCast())
      if (Instruction *Res = InstCombineStoreToCast(*this, SI))
        return Res;

  
  // If this store is the last instruction in the basic block (possibly
  // excepting debug info instructions), and if the block ends with an
  // unconditional branch, try to move it to the successor block.
  BBI = &SI; 
  do {
    ++BBI;
  } while (isa<DbgInfoIntrinsic>(BBI) ||
           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
    if (BI->isUnconditional())
      if (SimplifyStoreAtEndOfBlock(SI))
        return 0;  // xform done!
  
  return 0;
}
Example #24
0
void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
  IRBuilder<> Builder(CI->getParent(), CI);
  LLVMContext &Context = CI->getContext();

  const Function *Callee = CI->getCalledFunction();
  assert(Callee && "Cannot lower an indirect call!");

  CallSite CS(CI);
  switch (Callee->getIntrinsicID()) {
  case Intrinsic::not_intrinsic:
    report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
                      Callee->getName() + "'!");
  default:
    report_fatal_error("Code generator does not support intrinsic function '"+
                      Callee->getName()+"'!");

  case Intrinsic::expect: {
    // Just replace __builtin_expect(exp, c) with EXP.
    Value *V = CI->getArgOperand(0);
    CI->replaceAllUsesWith(V);
    break;
  }

    // The setjmp/longjmp intrinsics should only exist in the code if it was
    // never optimized (ie, right out of the CFE), or if it has been hacked on
    // by the lowerinvoke pass.  In both cases, the right thing to do is to
    // convert the call to an explicit setjmp or longjmp call.
  case Intrinsic::setjmp: {
    Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
                               Type::getInt32Ty(Context));
    if (!CI->getType()->isVoidTy())
      CI->replaceAllUsesWith(V);
    break;
  }
  case Intrinsic::sigsetjmp:
     if (!CI->getType()->isVoidTy())
       CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
     break;

  case Intrinsic::longjmp: {
    ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
                    Type::getVoidTy(Context));
    break;
  }

  case Intrinsic::siglongjmp: {
    // Insert the call to abort
    ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(), 
                    Type::getVoidTy(Context));
    break;
  }
  case Intrinsic::ctpop:
    CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
    break;

  case Intrinsic::bswap:
    CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
    break;
    
  case Intrinsic::ctlz:
    CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
    break;

  case Intrinsic::cttz: {
    // cttz(x) -> ctpop(~X & (X-1))
    Value *Src = CI->getArgOperand(0);
    Value *NotSrc = Builder.CreateNot(Src);
    NotSrc->setName(Src->getName() + ".not");
    Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
    SrcM1 = Builder.CreateSub(Src, SrcM1);
    Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
    CI->replaceAllUsesWith(Src);
    break;
  }

  case Intrinsic::stacksave:
  case Intrinsic::stackrestore: {
    if (!Warned)
      errs() << "WARNING: this target does not support the llvm.stack"
             << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
               "save" : "restore") << " intrinsic.\n";
    Warned = true;
    if (Callee->getIntrinsicID() == Intrinsic::stacksave)
      CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
    break;
  }
    
  case Intrinsic::returnaddress:
  case Intrinsic::frameaddress:
    errs() << "WARNING: this target does not support the llvm."
           << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
             "return" : "frame") << "address intrinsic.\n";
    CI->replaceAllUsesWith(ConstantPointerNull::get(
                                            cast<PointerType>(CI->getType())));
    break;

  case Intrinsic::prefetch:
    break;    // Simply strip out prefetches on unsupported architectures

  case Intrinsic::pcmarker:
    break;    // Simply strip out pcmarker on unsupported architectures
  case Intrinsic::readcyclecounter: {
    errs() << "WARNING: this target does not support the llvm.readcyclecoun"
           << "ter intrinsic.  It is being lowered to a constant 0\n";
    CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
    break;
  }

  case Intrinsic::dbg_declare:
    break;    // Simply strip out debugging intrinsics

  case Intrinsic::eh_typeid_for:
    // Return something different to eh_selector.
    CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
    break;

  case Intrinsic::annotation:
  case Intrinsic::ptr_annotation:
    // Just drop the annotation, but forward the value
    CI->replaceAllUsesWith(CI->getOperand(0));
    break;

  case Intrinsic::var_annotation:
    break;   // Strip out annotate intrinsic
    
  case Intrinsic::memcpy: {
    Type *IntPtr = TD.getIntPtrType(Context);
    Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
                                        /* isSigned */ false);
    Value *Ops[3];
    Ops[0] = CI->getArgOperand(0);
    Ops[1] = CI->getArgOperand(1);
    Ops[2] = Size;
    ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
    break;
  }
  case Intrinsic::memmove: {
    Type *IntPtr = TD.getIntPtrType(Context);
    Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
                                        /* isSigned */ false);
    Value *Ops[3];
    Ops[0] = CI->getArgOperand(0);
    Ops[1] = CI->getArgOperand(1);
    Ops[2] = Size;
    ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
    break;
  }
  case Intrinsic::memset: {
    Value *Op0 = CI->getArgOperand(0);
    Type *IntPtr = TD.getIntPtrType(Op0->getType());
    Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
                                        /* isSigned */ false);
    Value *Ops[3];
    Ops[0] = Op0;
    // Extend the amount to i32.
    Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1),
                                   Type::getInt32Ty(Context),
                                   /* isSigned */ false);
    Ops[2] = Size;
    ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
    break;
  }
  case Intrinsic::sqrt: {
    ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
    break;
  }
  case Intrinsic::log: {
    ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
    break;
  }
  case Intrinsic::log2: {
    ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
    break;
  }
  case Intrinsic::log10: {
    ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
    break;
  }
  case Intrinsic::exp: {
    ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
    break;
  }
  case Intrinsic::exp2: {
    ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
    break;
  }
  case Intrinsic::pow: {
    ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
    break;
  }
  case Intrinsic::flt_rounds:
     // Lower to "round to the nearest"
     if (!CI->getType()->isVoidTy())
       CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
     break;
  case Intrinsic::invariant_start:
  case Intrinsic::lifetime_start:
    // Discard region information.
    CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
    break;
  case Intrinsic::invariant_end:
  case Intrinsic::lifetime_end:
    // Discard region information.
    break;
  }

  assert(CI->use_empty() &&
         "Lowering should have eliminated any uses of the intrinsic call!");
  CI->eraseFromParent();
}
Value GranularityRounderPreferredNumbers::roundUp(Value value) {
    uassertNonNegativeNumber(value);

    if (value.coerceToDouble() == 0.0) {
        return value;
    }

    if (value.getType() == BSONType::NumberDecimal) {
        Decimal128 number = value.getDecimal();
        Decimal128 multiplier = Decimal128(1);

        // '_baseSeries' contains doubles, so we create a vector that contains the Decimal128
        // versions of the numbers in '_baseSeries' to make it easier to compare values to 'number'.
        vector<Decimal128> decimalSeries;
        for (auto&& doubleNumber : _baseSeries) {
            decimalSeries.push_back(Decimal128(doubleNumber));
        }

        while (number.isGreaterEqual(decimalSeries.back().multiply(multiplier))) {
            multiplier = multiplier.multiply(Decimal128(10));
        }

        Decimal128 previousMin;
        while (number.isLess(decimalSeries.front().multiply(multiplier))) {
            previousMin = decimalSeries.front().multiply(multiplier);
            multiplier = multiplier.divide(Decimal128(10));
            if (number.isGreaterEqual(decimalSeries.back().multiply(multiplier))) {
                // The number was between the previous min and the current max, so it must round up
                // to the previous min. For example, rounding up 0.8 in the E6 series.
                return Value(previousMin);
            }
        }

        // After scaling up or down, 'number' should now fall into the range spanned by
        // decimalSeries[i] * multiplier for all i in decimalSeries.
        invariant(number.isGreaterEqual(decimalSeries.front().multiply(multiplier)) &&
                  number.isLess(decimalSeries.back().multiply(multiplier)));

        // Get an iterator pointing to the first element in '_baseSeries' that is greater
        // than'number'.
        auto iterator =
            std::upper_bound(decimalSeries.begin(),
                             decimalSeries.end(),
                             number,
                             [multiplier](Decimal128 roundingNumber, Decimal128 seriesNumber) {
                                 return roundingNumber.isLess(seriesNumber.multiply(multiplier));
                             });

        return Value((*iterator).multiply(multiplier));
    } else {
        double number = value.coerceToDouble();
        double multiplier = 1.0;

        while (number >= (_baseSeries.back() * multiplier)) {
            multiplier *= 10.0;
        }

        double previousMin;
        while (number < (_baseSeries.front() * multiplier)) {
            previousMin = _baseSeries.front() * multiplier;
            multiplier /= 10.0;
            if (number >= (_baseSeries.back() * multiplier)) {
                // The number was between the previous min and the current max, so it must round up
                // to the previous min. For example, rounding up 0.8 in the E6 series.
                return Value(previousMin);
            }
        }

        // After scaling up or down, 'number' should now fall into the range spanned by
        // _baseSeries[i] * multiplier for all i in _baseSeries.
        invariant(number >= (_baseSeries.front() * multiplier) &&
                  number < (_baseSeries.back() * multiplier));

        // Get an iterator pointing to the first element in '_baseSeries' that is greater
        // than'number'.
        auto iterator = std::upper_bound(_baseSeries.begin(),
                                         _baseSeries.end(),
                                         number,
                                         [multiplier](double roundingNumber, double seriesNumber) {
                                             return roundingNumber < (seriesNumber * multiplier);
                                         });
        return Value(*iterator * multiplier);
    }
}
Example #26
0
bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
  bool MadeChange = false;

  if (!DL)
    return MadeChange;

  // Only prep. the inner-most loop
  if (!L->empty())
    return MadeChange;

  BasicBlock *Header = L->getHeader();

  const PPCSubtarget *ST =
    TM ? TM->getSubtargetImpl(*Header->getParent()) : nullptr;

  unsigned HeaderLoopPredCount = 0;
  for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
       PI != PE; ++PI) {
    ++HeaderLoopPredCount;
  }

  // Collect buckets of comparable addresses used by loads and stores.
  typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket;
  SmallVector<Bucket, 16> Buckets;
  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
       I != IE; ++I) {
    for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
        J != JE; ++J) {
      Value *PtrValue;
      Instruction *MemI;

      if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
        MemI = LMemI;
        PtrValue = LMemI->getPointerOperand();
      } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
        MemI = SMemI;
        PtrValue = SMemI->getPointerOperand();
      } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(J)) {
        if (IMemI->getIntrinsicID() == Intrinsic::prefetch) {
          MemI = IMemI;
          PtrValue = IMemI->getArgOperand(0);
        } else continue;
      } else continue;

      unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
      if (PtrAddrSpace)
        continue;

      // There are no update forms for Altivec vector load/stores.
      if (ST && ST->hasAltivec() &&
          PtrValue->getType()->getPointerElementType()->isVectorTy())
        continue;

      if (L->isLoopInvariant(PtrValue))
        continue;

      const SCEV *LSCEV = SE->getSCEV(PtrValue);
      if (!isa<SCEVAddRecExpr>(LSCEV))
        continue;

      bool FoundBucket = false;
      for (unsigned i = 0, e = Buckets.size(); i != e; ++i)
        for (Bucket::iterator K = Buckets[i].begin(), KE = Buckets[i].end();
             K != KE; ++K) {
          const SCEV *Diff = SE->getMinusSCEV(K->first, LSCEV);
          if (isa<SCEVConstant>(Diff)) {
            Buckets[i].insert(std::make_pair(LSCEV, MemI));
            FoundBucket = true;
            break;
          }
        }

      if (!FoundBucket) {
        Buckets.push_back(Bucket(SCEVLess(SE)));
        Buckets[Buckets.size()-1].insert(std::make_pair(LSCEV, MemI));
      }
    }
  }

  if (Buckets.empty() || Buckets.size() > MaxVars)
    return MadeChange;

  BasicBlock *LoopPredecessor = L->getLoopPredecessor();
  // If there is no loop predecessor, or the loop predecessor's terminator
  // returns a value (which might contribute to determining the loop's
  // iteration space), insert a new preheader for the loop.
  if (!LoopPredecessor ||
      !LoopPredecessor->getTerminator()->getType()->isVoidTy())
    LoopPredecessor = InsertPreheaderForLoop(L, this);
  if (!LoopPredecessor)
    return MadeChange;

  SmallSet<BasicBlock *, 16> BBChanged;
  for (unsigned i = 0, e = Buckets.size(); i != e; ++i) {
    // The base address of each bucket is transformed into a phi and the others
    // are rewritten as offsets of that variable.

    const SCEVAddRecExpr *BasePtrSCEV =
      cast<SCEVAddRecExpr>(Buckets[i].begin()->first);
    if (!BasePtrSCEV->isAffine())
      continue;

    Instruction *MemI = Buckets[i].begin()->second;
    Value *BasePtr = GetPointerOperand(MemI);
    assert(BasePtr && "No pointer operand");

    Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
      BasePtr->getType()->getPointerAddressSpace());

    const SCEV *BasePtrStartSCEV = BasePtrSCEV->getStart();
    if (!SE->isLoopInvariant(BasePtrStartSCEV, L))
      continue;

    const SCEVConstant *BasePtrIncSCEV =
      dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
    if (!BasePtrIncSCEV)
      continue;
    BasePtrStartSCEV = SE->getMinusSCEV(BasePtrStartSCEV, BasePtrIncSCEV);
    if (!isSafeToExpand(BasePtrStartSCEV, *SE))
      continue;

    PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
      MemI->hasName() ? MemI->getName() + ".phi" : "",
      Header->getFirstNonPHI());

    SCEVExpander SCEVE(*SE, "pistart");
    Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
      LoopPredecessor->getTerminator());

    // Note that LoopPredecessor might occur in the predecessor list multiple
    // times, and we need to add it the right number of times.
    for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
         PI != PE; ++PI) {
      if (*PI != LoopPredecessor)
        continue;

      NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
    }

    Instruction *InsPoint = Header->getFirstInsertionPt();
    GetElementPtrInst *PtrInc =
      GetElementPtrInst::Create(NewPHI, BasePtrIncSCEV->getValue(),
        MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
    PtrInc->setIsInBounds(IsPtrInBounds(BasePtr));
    for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
         PI != PE; ++PI) {
      if (*PI == LoopPredecessor)
        continue;

      NewPHI->addIncoming(PtrInc, *PI);
    }

    Instruction *NewBasePtr;
    if (PtrInc->getType() != BasePtr->getType())
      NewBasePtr = new BitCastInst(PtrInc, BasePtr->getType(),
        PtrInc->hasName() ? PtrInc->getName() + ".cast" : "", InsPoint);
    else
      NewBasePtr = PtrInc;

    if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
      BBChanged.insert(IDel->getParent());
    BasePtr->replaceAllUsesWith(NewBasePtr);
    RecursivelyDeleteTriviallyDeadInstructions(BasePtr);

    Value *LastNewPtr = NewBasePtr;
    for (Bucket::iterator I = std::next(Buckets[i].begin()),
         IE = Buckets[i].end(); I != IE; ++I) {
      Value *Ptr = GetPointerOperand(I->second);
      assert(Ptr && "No pointer operand");
      if (Ptr == LastNewPtr)
        continue;

      Instruction *RealNewPtr;
      const SCEVConstant *Diff =
        cast<SCEVConstant>(SE->getMinusSCEV(I->first, BasePtrSCEV));
      if (Diff->isZero()) {
        RealNewPtr = NewBasePtr;
      } else {
        Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
        if (PtrIP && isa<Instruction>(NewBasePtr) &&
            cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
          PtrIP = 0;
        else if (isa<PHINode>(PtrIP))
          PtrIP = PtrIP->getParent()->getFirstInsertionPt();
        else if (!PtrIP)
          PtrIP = I->second;
  
        GetElementPtrInst *NewPtr =
          GetElementPtrInst::Create(PtrInc, Diff->getValue(),
            I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP);
        if (!PtrIP)
          NewPtr->insertAfter(cast<Instruction>(PtrInc));
        NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
        RealNewPtr = NewPtr;
      }

      if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
        BBChanged.insert(IDel->getParent());

      Instruction *ReplNewPtr;
      if (Ptr->getType() != RealNewPtr->getType()) {
        ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
          Ptr->hasName() ? Ptr->getName() + ".cast" : "");
        ReplNewPtr->insertAfter(RealNewPtr);
      } else
        ReplNewPtr = RealNewPtr;

      Ptr->replaceAllUsesWith(ReplNewPtr);
      RecursivelyDeleteTriviallyDeadInstructions(Ptr);

      LastNewPtr = RealNewPtr;
    }

    MadeChange = true;
  }

  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
       I != IE; ++I) {
    if (BBChanged.count(*I))
      DeleteDeadPHIs(*I);
  }

  return MadeChange;
}
Example #27
0
void Lint::visitCallSite(CallSite CS) {
    Instruction &I = *CS.getInstruction();
    Value *Callee = CS.getCalledValue();

    visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
                         0, nullptr, MemRef::Callee);

    if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
        Assert1(CS.getCallingConv() == F->getCallingConv(),
                "Undefined behavior: Caller and callee calling convention differ",
                &I);

        FunctionType *FT = F->getFunctionType();
        unsigned NumActualArgs = CS.arg_size();

        Assert1(FT->isVarArg() ?
                FT->getNumParams() <= NumActualArgs :
                FT->getNumParams() == NumActualArgs,
                "Undefined behavior: Call argument count mismatches callee "
                "argument count", &I);

        Assert1(FT->getReturnType() == I.getType(),
                "Undefined behavior: Call return type mismatches "
                "callee return type", &I);

        // Check argument types (in case the callee was casted) and attributes.
        // TODO: Verify that caller and callee attributes are compatible.
        Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
        CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
        for (; AI != AE; ++AI) {
            Value *Actual = *AI;
            if (PI != PE) {
                Argument *Formal = PI++;
                Assert1(Formal->getType() == Actual->getType(),
                        "Undefined behavior: Call argument type mismatches "
                        "callee parameter type", &I);

                // Check that noalias arguments don't alias other arguments. This is
                // not fully precise because we don't know the sizes of the dereferenced
                // memory regions.
                if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
                    for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
                        if (AI != BI && (*BI)->getType()->isPointerTy()) {
                            AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
                            Assert1(Result != AliasAnalysis::MustAlias &&
                                    Result != AliasAnalysis::PartialAlias,
                                    "Unusual: noalias argument aliases another argument", &I);
                        }

                // Check that an sret argument points to valid memory.
                if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
                    Type *Ty =
                        cast<PointerType>(Formal->getType())->getElementType();
                    visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
                                         DL ? DL->getABITypeAlignment(Ty) : 0,
                                         Ty, MemRef::Read | MemRef::Write);
                }
            }
        }
    }

    if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
        for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
                AI != AE; ++AI) {
            Value *Obj = findValue(*AI, /*OffsetOk=*/true);
            Assert1(!isa<AllocaInst>(Obj),
                    "Undefined behavior: Call with \"tail\" keyword references "
                    "alloca", &I);
        }


    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
        switch (II->getIntrinsicID()) {
        default:
            break;

        // TODO: Check more intrinsics

        case Intrinsic::memcpy: {
            MemCpyInst *MCI = cast<MemCpyInst>(&I);
            // TODO: If the size is known, use it.
            visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
                                 MCI->getAlignment(), nullptr,
                                 MemRef::Write);
            visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
                                 MCI->getAlignment(), nullptr,
                                 MemRef::Read);

            // Check that the memcpy arguments don't overlap. The AliasAnalysis API
            // isn't expressive enough for what we really want to do. Known partial
            // overlap is not distinguished from the case where nothing is known.
            uint64_t Size = 0;
            if (const ConstantInt *Len =
                        dyn_cast<ConstantInt>(findValue(MCI->getLength(),
                                              /*OffsetOk=*/false)))
                if (Len->getValue().isIntN(32))
                    Size = Len->getValue().getZExtValue();
            Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
                    AliasAnalysis::MustAlias,
                    "Undefined behavior: memcpy source and destination overlap", &I);
            break;
        }
        case Intrinsic::memmove: {
            MemMoveInst *MMI = cast<MemMoveInst>(&I);
            // TODO: If the size is known, use it.
            visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
                                 MMI->getAlignment(), nullptr,
                                 MemRef::Write);
            visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
                                 MMI->getAlignment(), nullptr,
                                 MemRef::Read);
            break;
        }
        case Intrinsic::memset: {
            MemSetInst *MSI = cast<MemSetInst>(&I);
            // TODO: If the size is known, use it.
            visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
                                 MSI->getAlignment(), nullptr,
                                 MemRef::Write);
            break;
        }

        case Intrinsic::vastart:
            Assert1(I.getParent()->getParent()->isVarArg(),
                    "Undefined behavior: va_start called in a non-varargs function",
                    &I);

            visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
                                 0, nullptr, MemRef::Read | MemRef::Write);
            break;
        case Intrinsic::vacopy:
            visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
                                 0, nullptr, MemRef::Write);
            visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
                                 0, nullptr, MemRef::Read);
            break;
        case Intrinsic::vaend:
            visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
                                 0, nullptr, MemRef::Read | MemRef::Write);
            break;

        case Intrinsic::stackrestore:
            // Stackrestore doesn't read or write memory, but it sets the
            // stack pointer, which the compiler may read from or write to
            // at any time, so check it for both readability and writeability.
            visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
                                 0, nullptr, MemRef::Read | MemRef::Write);
            break;
        }
}
Example #28
0
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
/// between it and the return.
///
/// This function only tests target-independent requirements.
bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
                                const TargetLowering &TLI) {
  const Instruction *I = CS.getInstruction();
  const BasicBlock *ExitBB = I->getParent();
  const TerminatorInst *Term = ExitBB->getTerminator();
  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);

  // The block must end in a return statement or unreachable.
  //
  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
  // an unreachable, for now. The way tailcall optimization is currently
  // implemented means it will add an epilogue followed by a jump. That is
  // not profitable. Also, if the callee is a special function (e.g.
  // longjmp on x86), it can end up causing miscompilation that has not
  // been fully understood.
  if (!Ret &&
      (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
       !isa<UnreachableInst>(Term)))
    return false;

  // If I will have a chain, make sure no other instruction that will have a
  // chain interposes between I and the return.
  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
      !isSafeToSpeculativelyExecute(I))
    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
         --BBI) {
      if (&*BBI == I)
        break;
      // Debug info intrinsics do not get in the way of tail call optimization.
      if (isa<DbgInfoIntrinsic>(BBI))
        continue;
      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
          !isSafeToSpeculativelyExecute(BBI))
        return false;
    }

  // If the block ends with a void return or unreachable, it doesn't matter
  // what the call's return type is.
  if (!Ret || Ret->getNumOperands() == 0) return true;

  // If the return value is undef, it doesn't matter what the call's
  // return type is.
  if (isa<UndefValue>(Ret->getOperand(0))) return true;

  // Conservatively require the attributes of the call to match those of
  // the return. Ignore noalias because it doesn't affect the call sequence.
  const Function *F = ExitBB->getParent();
  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
  if (AttrBuilder(CalleeRetAttr).removeAttribute(Attributes::NoAlias) !=
      AttrBuilder(CallerRetAttr).removeAttribute(Attributes::NoAlias))
    return false;

  // It's not safe to eliminate the sign / zero extension of the return value.
  if (CallerRetAttr.hasAttribute(Attributes::ZExt) ||
      CallerRetAttr.hasAttribute(Attributes::SExt))
    return false;

  // Otherwise, make sure the unmodified return value of I is the return value.
  // We handle two cases: multiple return values + scalars.
  Value *RetVal = Ret->getOperand(0);
  if (!isa<InsertValueInst>(RetVal) || !isa<StructType>(RetVal->getType()))
    // Handle scalars first.
    return getNoopInput(Ret->getOperand(0), TLI) == I;

  // If this is an aggregate return, look through the insert/extract values and
  // see if each is transparent.
  for (unsigned i = 0, e =cast<StructType>(RetVal->getType())->getNumElements();
       i != e; ++i) {
    const Value *InScalar = FindInsertedValue(RetVal, i);
    if (InScalar == 0) return false;
    InScalar = getNoopInput(InScalar, TLI);

    // If the scalar value being inserted is an extractvalue of the right index
    // from the call, then everything is good.
    const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(InScalar);
    if (EVI == 0 || EVI->getOperand(0) != I || EVI->getNumIndices() != 1 ||
        EVI->getIndices()[0] != i)
      return false;
  }

  return true;
}
Example #29
0
/// DoPromotion - This method actually performs the promotion of the specified
/// arguments, and returns the new function.  At this point, we know that it's
/// safe to do so.
CallGraphNode *ArgPromotion::DoPromotion(Function *F,
                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
                              SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {

  // Start by computing a new prototype for the function, which is the same as
  // the old function, but has modified arguments.
  FunctionType *FTy = F->getFunctionType();
  std::vector<Type*> Params;

  typedef std::set<IndicesVector> ScalarizeTable;

  // ScalarizedElements - If we are promoting a pointer that has elements
  // accessed out of it, keep track of which elements are accessed so that we
  // can add one argument for each.
  //
  // Arguments that are directly loaded will have a zero element value here, to
  // handle cases where there are both a direct load and GEP accesses.
  //
  std::map<Argument*, ScalarizeTable> ScalarizedElements;

  // OriginalLoads - Keep track of a representative load instruction from the
  // original function so that we can tell the alias analysis implementation
  // what the new GEP/Load instructions we are inserting look like.
  std::map<IndicesVector, LoadInst*> OriginalLoads;

  // Attribute - Keep track of the parameter attributes for the arguments
  // that we are *not* promoting. For the ones that we do promote, the parameter
  // attributes are lost
  SmallVector<AttributeSet, 8> AttributesVec;
  const AttributeSet &PAL = F->getAttributes();

  // Add any return attributes.
  if (PAL.hasAttributes(AttributeSet::ReturnIndex))
    AttributesVec.push_back(AttributeSet::get(F->getContext(),
                                              PAL.getRetAttributes()));

  // First, determine the new argument list
  unsigned ArgIndex = 1;
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
       ++I, ++ArgIndex) {
    if (ByValArgsToTransform.count(I)) {
      // Simple byval argument? Just add all the struct element types.
      Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      StructType *STy = cast<StructType>(AgTy);
      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
        Params.push_back(STy->getElementType(i));
      ++NumByValArgsPromoted;
    } else if (!ArgsToPromote.count(I)) {
      // Unchanged argument
      Params.push_back(I->getType());
      AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
      if (attrs.hasAttributes(ArgIndex)) {
        AttrBuilder B(attrs, ArgIndex);
        AttributesVec.
          push_back(AttributeSet::get(F->getContext(), Params.size(), B));
      }
    } else if (I->use_empty()) {
      // Dead argument (which are always marked as promotable)
      ++NumArgumentsDead;
    } else {
      // Okay, this is being promoted. This means that the only uses are loads
      // or GEPs which are only used by loads

      // In this table, we will track which indices are loaded from the argument
      // (where direct loads are tracked as no indices).
      ScalarizeTable &ArgIndices = ScalarizedElements[I];
      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
           ++UI) {
        Instruction *User = cast<Instruction>(*UI);
        assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User));
        IndicesVector Indices;
        Indices.reserve(User->getNumOperands() - 1);
        // Since loads will only have a single operand, and GEPs only a single
        // non-index operand, this will record direct loads without any indices,
        // and gep+loads with the GEP indices.
        for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end();
             II != IE; ++II)
          Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
        // GEPs with a single 0 index can be merged with direct loads
        if (Indices.size() == 1 && Indices.front() == 0)
          Indices.clear();
        ArgIndices.insert(Indices);
        LoadInst *OrigLoad;
        if (LoadInst *L = dyn_cast<LoadInst>(User))
          OrigLoad = L;
        else
          // Take any load, we will use it only to update Alias Analysis
          OrigLoad = cast<LoadInst>(User->use_back());
        OriginalLoads[Indices] = OrigLoad;
      }

      // Add a parameter to the function for each element passed in.
      for (ScalarizeTable::iterator SI = ArgIndices.begin(),
             E = ArgIndices.end(); SI != E; ++SI) {
        // not allowed to dereference ->begin() if size() is 0
        Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), *SI));
        assert(Params.back());
      }

      if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
        ++NumArgumentsPromoted;
      else
        ++NumAggregatesPromoted;
    }
  }

  // Add any function attributes.
  if (PAL.hasAttributes(AttributeSet::FunctionIndex))
    AttributesVec.push_back(AttributeSet::get(FTy->getContext(),
                                              PAL.getFnAttributes()));

  Type *RetTy = FTy->getReturnType();

  // Construct the new function type using the new arguments.
  FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());

  // Create the new function body and insert it into the module.
  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
  NF->copyAttributesFrom(F);

  
  DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
        << "From: " << *F);
  
  // Recompute the parameter attributes list based on the new arguments for
  // the function.
  NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
  AttributesVec.clear();

  F->getParent()->getFunctionList().insert(F, NF);
  NF->takeName(F);

  // Get the alias analysis information that we need to update to reflect our
  // changes.
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

  // Get the callgraph information that we need to update to reflect our
  // changes.
  CallGraph &CG = getAnalysis<CallGraph>();
  
  // Get a new callgraph node for NF.
  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);

  // Loop over all of the callers of the function, transforming the call sites
  // to pass in the loaded pointers.
  //
  SmallVector<Value*, 16> Args;
  while (!F->use_empty()) {
    CallSite CS(F->use_back());
    assert(CS.getCalledFunction() == F);
    Instruction *Call = CS.getInstruction();
    const AttributeSet &CallPAL = CS.getAttributes();

    // Add any return attributes.
    if (CallPAL.hasAttributes(AttributeSet::ReturnIndex))
      AttributesVec.push_back(AttributeSet::get(F->getContext(),
                                                CallPAL.getRetAttributes()));

    // Loop over the operands, inserting GEP and loads in the caller as
    // appropriate.
    CallSite::arg_iterator AI = CS.arg_begin();
    ArgIndex = 1;
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
         I != E; ++I, ++AI, ++ArgIndex)
      if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
        Args.push_back(*AI);          // Unmodified argument

        if (CallPAL.hasAttributes(ArgIndex)) {
          AttrBuilder B(CallPAL, ArgIndex);
          AttributesVec.
            push_back(AttributeSet::get(F->getContext(), Args.size(), B));
        }
      } else if (ByValArgsToTransform.count(I)) {
        // Emit a GEP and load for each element of the struct.
        Type *AgTy = cast<PointerType>(I->getType())->getElementType();
        StructType *STy = cast<StructType>(AgTy);
        Value *Idxs[2] = {
              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
          Value *Idx = GetElementPtrInst::Create(*AI, Idxs,
                                                 (*AI)->getName()+"."+utostr(i),
                                                 Call);
          // TODO: Tell AA about the new values?
          Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
        }
      } else if (!I->use_empty()) {
        // Non-dead argument: insert GEPs and loads as appropriate.
        ScalarizeTable &ArgIndices = ScalarizedElements[I];
        // Store the Value* version of the indices in here, but declare it now
        // for reuse.
        std::vector<Value*> Ops;
        for (ScalarizeTable::iterator SI = ArgIndices.begin(),
               E = ArgIndices.end(); SI != E; ++SI) {
          Value *V = *AI;
          LoadInst *OrigLoad = OriginalLoads[*SI];
          if (!SI->empty()) {
            Ops.reserve(SI->size());
            Type *ElTy = V->getType();
            for (IndicesVector::const_iterator II = SI->begin(),
                 IE = SI->end(); II != IE; ++II) {
              // Use i32 to index structs, and i64 for others (pointers/arrays).
              // This satisfies GEP constraints.
              Type *IdxTy = (ElTy->isStructTy() ?
                    Type::getInt32Ty(F->getContext()) : 
                    Type::getInt64Ty(F->getContext()));
              Ops.push_back(ConstantInt::get(IdxTy, *II));
              // Keep track of the type we're currently indexing.
              ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
            }
            // And create a GEP to extract those indices.
            V = GetElementPtrInst::Create(V, Ops, V->getName()+".idx", Call);
            Ops.clear();
            AA.copyValue(OrigLoad->getOperand(0), V);
          }
          // Since we're replacing a load make sure we take the alignment
          // of the previous load.
          LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
          newLoad->setAlignment(OrigLoad->getAlignment());
          // Transfer the TBAA info too.
          newLoad->setMetadata(LLVMContext::MD_tbaa,
                               OrigLoad->getMetadata(LLVMContext::MD_tbaa));
          Args.push_back(newLoad);
          AA.copyValue(OrigLoad, Args.back());
        }
      }

    // Push any varargs arguments on the list.
    for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
      Args.push_back(*AI);
      if (CallPAL.hasAttributes(ArgIndex)) {
        AttrBuilder B(CallPAL, ArgIndex);
        AttributesVec.
          push_back(AttributeSet::get(F->getContext(), Args.size(), B));
      }
    }

    // Add any function attributes.
    if (CallPAL.hasAttributes(AttributeSet::FunctionIndex))
      AttributesVec.push_back(AttributeSet::get(Call->getContext(),
                                                CallPAL.getFnAttributes()));

    Instruction *New;
    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
                               Args, "", Call);
      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
      cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(),
                                                            AttributesVec));
    } else {
      New = CallInst::Create(NF, Args, "", Call);
      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
      cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
                                                          AttributesVec));
      if (cast<CallInst>(Call)->isTailCall())
        cast<CallInst>(New)->setTailCall();
    }
    Args.clear();
    AttributesVec.clear();

    // Update the alias analysis implementation to know that we are replacing
    // the old call with a new one.
    AA.replaceWithNewValue(Call, New);

    // Update the callgraph to know that the callsite has been transformed.
    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
    CalleeNode->replaceCallEdge(Call, New, NF_CGN);

    if (!Call->use_empty()) {
      Call->replaceAllUsesWith(New);
      New->takeName(Call);
    }

    // Finally, remove the old call from the program, reducing the use-count of
    // F.
    Call->eraseFromParent();
  }

  // Since we have now created the new function, splice the body of the old
  // function right into the new function, leaving the old rotting hulk of the
  // function empty.
  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());

  // Loop over the argument list, transferring uses of the old arguments over to
  // the new arguments, also transferring over the names as well.
  //
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
       I2 = NF->arg_begin(); I != E; ++I) {
    if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
      // If this is an unmodified argument, move the name and users over to the
      // new version.
      I->replaceAllUsesWith(I2);
      I2->takeName(I);
      AA.replaceWithNewValue(I, I2);
      ++I2;
      continue;
    }

    if (ByValArgsToTransform.count(I)) {
      // In the callee, we create an alloca, and store each of the new incoming
      // arguments into the alloca.
      Instruction *InsertPt = NF->begin()->begin();

      // Just add all the struct element types.
      Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
      StructType *STy = cast<StructType>(AgTy);
      Value *Idxs[2] = {
            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };

      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
        Value *Idx = 
          GetElementPtrInst::Create(TheAlloca, Idxs,
                                    TheAlloca->getName()+"."+Twine(i), 
                                    InsertPt);
        I2->setName(I->getName()+"."+Twine(i));
        new StoreInst(I2++, Idx, InsertPt);
      }

      // Anything that used the arg should now use the alloca.
      I->replaceAllUsesWith(TheAlloca);
      TheAlloca->takeName(I);
      AA.replaceWithNewValue(I, TheAlloca);
      continue;
    }

    if (I->use_empty()) {
      AA.deleteValue(I);
      continue;
    }

    // Otherwise, if we promoted this argument, then all users are load
    // instructions (or GEPs with only load users), and all loads should be
    // using the new argument that we added.
    ScalarizeTable &ArgIndices = ScalarizedElements[I];

    while (!I->use_empty()) {
      if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) {
        assert(ArgIndices.begin()->empty() &&
               "Load element should sort to front!");
        I2->setName(I->getName()+".val");
        LI->replaceAllUsesWith(I2);
        AA.replaceWithNewValue(LI, I2);
        LI->eraseFromParent();
        DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
              << "' in function '" << F->getName() << "'\n");
      } else {
        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
        IndicesVector Operands;
        Operands.reserve(GEP->getNumIndices());
        for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
             II != IE; ++II)
          Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());

        // GEPs with a single 0 index can be merged with direct loads
        if (Operands.size() == 1 && Operands.front() == 0)
          Operands.clear();

        Function::arg_iterator TheArg = I2;
        for (ScalarizeTable::iterator It = ArgIndices.begin();
             *It != Operands; ++It, ++TheArg) {
          assert(It != ArgIndices.end() && "GEP not handled??");
        }

        std::string NewName = I->getName();
        for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
            NewName += "." + utostr(Operands[i]);
        }
        NewName += ".val";
        TheArg->setName(NewName);

        DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
              << "' of function '" << NF->getName() << "'\n");

        // All of the uses must be load instructions.  Replace them all with
        // the argument specified by ArgNo.
        while (!GEP->use_empty()) {
          LoadInst *L = cast<LoadInst>(GEP->use_back());
          L->replaceAllUsesWith(TheArg);
          AA.replaceWithNewValue(L, TheArg);
          L->eraseFromParent();
        }
        AA.deleteValue(GEP);
        GEP->eraseFromParent();
      }
    }

    // Increment I2 past all of the arguments added for this promoted pointer.
    std::advance(I2, ArgIndices.size());
  }

  // Tell the alias analysis that the old function is about to disappear.
  AA.replaceWithNewValue(F, NF);

  
  NF_CGN->stealCalledFunctionsFrom(CG[F]);
  
  // Now that the old function is dead, delete it.  If there is a dangling
  // reference to the CallgraphNode, just leave the dead function around for
  // someone else to nuke.
  CallGraphNode *CGN = CG[F];
  if (CGN->getNumReferences() == 0)
    delete CG.removeFunctionFromModule(CGN);
  else
    F->setLinkage(Function::ExternalLinkage);
  
  return NF_CGN;
}
static bool processPHI(PHINode *P, LazyValueInfo *LVI) {
  bool Changed = false;

  BasicBlock *BB = P->getParent();
  for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
    Value *Incoming = P->getIncomingValue(i);
    if (isa<Constant>(Incoming)) continue;

    Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB, P);

    // Look if the incoming value is a select with a scalar condition for which
    // LVI can tells us the value. In that case replace the incoming value with
    // the appropriate value of the select. This often allows us to remove the
    // select later.
    if (!V) {
      SelectInst *SI = dyn_cast<SelectInst>(Incoming);
      if (!SI) continue;

      Value *Condition = SI->getCondition();
      if (!Condition->getType()->isVectorTy()) {
        if (Constant *C = LVI->getConstantOnEdge(
                Condition, P->getIncomingBlock(i), BB, P)) {
          if (C->isOneValue()) {
            V = SI->getTrueValue();
          } else if (C->isZeroValue()) {
            V = SI->getFalseValue();
          }
          // Once LVI learns to handle vector types, we could also add support
          // for vector type constants that are not all zeroes or all ones.
        }
      }

      // Look if the select has a constant but LVI tells us that the incoming
      // value can never be that constant. In that case replace the incoming
      // value with the other value of the select. This often allows us to
      // remove the select later.
      if (!V) {
        Constant *C = dyn_cast<Constant>(SI->getFalseValue());
        if (!C) continue;

        if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
              P->getIncomingBlock(i), BB, P) !=
            LazyValueInfo::False)
          continue;
        V = SI->getTrueValue();
      }

      DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n');
    }

    P->setIncomingValue(i, V);
    Changed = true;
  }

  // FIXME: Provide TLI, DT, AT to SimplifyInstruction.
  const DataLayout &DL = BB->getModule()->getDataLayout();
  if (Value *V = SimplifyInstruction(P, DL)) {
    P->replaceAllUsesWith(V);
    P->eraseFromParent();
    Changed = true;
  }

  if (Changed)
    ++NumPhis;

  return Changed;
}