示例#1
0
/// Escape RegNode so that we can access it from child handlers. Find the call
/// to localescape, if any, in the entry block and append RegNode to the list
/// of arguments.
int WinEHStatePass::escapeRegNode(Function &F) {
    // Find the call to localescape and extract its arguments.
    IntrinsicInst *EscapeCall = nullptr;
    for (Instruction &I : F.getEntryBlock()) {
        IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
        if (II && II->getIntrinsicID() == Intrinsic::localescape) {
            EscapeCall = II;
            break;
        }
    }
    SmallVector<Value *, 8> Args;
    if (EscapeCall) {
        auto Ops = EscapeCall->arg_operands();
        Args.append(Ops.begin(), Ops.end());
    }
    Args.push_back(RegNode);

    // Replace the call (if it exists) with new one. Otherwise, insert at the end
    // of the entry block.
    IRBuilder<> Builder(&F.getEntryBlock(),
                        EscapeCall ? EscapeCall : F.getEntryBlock().end());
    Builder.CreateCall(FrameEscape, Args);
    if (EscapeCall)
        EscapeCall->eraseFromParent();
    return Args.size() - 1;
}
示例#2
0
文件: Lint.cpp 项目: IanLee1521/ares
static bool allPredCameFromBeginCatch(
    BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin,
    IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) {
  VisitedBlocks.insert(BB);
  // Look for a begincatch in this block.
  for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE;
       ++RI) {
    IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI);
    if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch)
      return true;
    // If we find another end catch before we find a begin catch, that's
    // an error.
    if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) {
      *SecondEndCatch = IC;
      return false;
    }
    // If we encounter a landingpad instruction, the search failed.
    if (isa<LandingPadInst>(*RI))
      return false;
  }
  // If while searching we find a block with no predeccesors,
  // the search failed.
  if (pred_empty(BB))
    return false;
  // Search any predecessors we haven't seen before.
  for (BasicBlock *Pred : predecessors(BB)) {
    if (VisitedBlocks.count(Pred))
      continue;
    if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch,
                                   VisitedBlocks))
      return false;
  }
  return true;
}
示例#3
0
文件: Lint.cpp 项目: IanLee1521/ares
static bool
allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin,
                           IntrinsicInst **SecondBeginCatch,
                           SmallSet<BasicBlock *, 4> &VisitedBlocks) {
  VisitedBlocks.insert(BB);
  for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) {
    IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I);
    if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch)
      return true;
    // If we find another begincatch while looking for an endcatch,
    // that's also an error.
    if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) {
      *SecondBeginCatch = IC;
      return false;
    }
  }

  // If we reach a block with no successors while searching, the
  // search has failed.
  if (succ_empty(BB))
    return false;
  // Otherwise, search all of the successors.
  for (BasicBlock *Succ : successors(BB)) {
    if (VisitedBlocks.count(Succ))
      continue;
    if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch,
                                    VisitedBlocks))
      return false;
  }
  return true;
}
示例#4
0
bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
    BasicBlock *BB = CI->getParent();

    // Lower inline assembly if we can.
    // If we found an inline asm expession, and if the target knows how to
    // lower it to normal LLVM code, do so now.
    if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
        if (TLI->ExpandInlineAsm(CI)) {
            // Avoid invalidating the iterator.
            CurInstIterator = BB->begin();
            // Avoid processing instructions out of order, which could cause
            // reuse before a value is defined.
            SunkAddrs.clear();
            return true;
        }
        // Sink address computing for memory operands into the block.
        if (OptimizeInlineAsmInst(CI))
            return true;
    }

    // Lower all uses of llvm.objectsize.*
    IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
    if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
        bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
        Type *ReturnTy = CI->getType();
        Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);

        // Substituting this can cause recursive simplifications, which can
        // invalidate our iterator.  Use a WeakVH to hold onto it in case this
        // happens.
        WeakVH IterHandle(CurInstIterator);

        ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
                                  TLInfo, ModifiedDT ? 0 : DT);

        // If the iterator instruction was recursively deleted, start over at the
        // start of the block.
        if (IterHandle != CurInstIterator) {
            CurInstIterator = BB->begin();
            SunkAddrs.clear();
        }
        return true;
    }

    // From here on out we're working with named functions.
    if (CI->getCalledFunction() == 0) return false;

    // We'll need TargetData from here on out.
    const TargetData *TD = TLI ? TLI->getTargetData() : 0;
    if (!TD) return false;

    // Lower all default uses of _chk calls.  This is very similar
    // to what InstCombineCalls does, but here we are only lowering calls
    // that have the default "don't know" as the objectsize.  Anything else
    // should be left alone.
    CodeGenPrepareFortifiedLibCalls Simplifier;
    return Simplifier.fold(CI, TD);
}
示例#5
0
void AAAnalyzer::handle_instrinsic(Instruction *inst) {
    IntrinsicInst * call = (IntrinsicInst*) inst;
    switch (call->getIntrinsicID()) {
            // Variable Argument Handling Intrinsics
        case Intrinsic::vastart:
        {
            Value * va_list_ptr = call->getArgOperand(0);
            wrapValue(va_list_ptr);
        }
            break;
        case Intrinsic::vaend:
        {
        }
            break;
        case Intrinsic::vacopy: // the same with memmove/memcpy

            //Standard C Library Intrinsics
        case Intrinsic::memmove:
        case Intrinsic::memcpy:
        {
            Value * src_ptr = call->getArgOperand(0);
            Value * dst_ptr = call->getArgOperand(1);

            DyckVertex* src_ptr_ver = wrapValue(src_ptr);
            DyckVertex* dst_ptr_ver = wrapValue(dst_ptr);

            DyckVertex* src_ver = addPtrTo(src_ptr_ver, NULL);
            DyckVertex* dst_ver = addPtrTo(dst_ptr_ver, NULL);

            makeAlias(src_ver, dst_ver);
        }
            break;
        case Intrinsic::memset:
        {
            Value * ptr = call->getArgOperand(0);
            Value * val = call->getArgOperand(1);
            addPtrTo(wrapValue(ptr), wrapValue(val));
        }
            break;
            /// @todo other C lib intrinsics

            //Accurate Garbage Collection Intrinsics
            //Code Generator Intrinsics
            //Bit Manipulation Intrinsics
            //Exception Handling Intrinsics
            //Trampoline Intrinsics
            //Memory Use Markers
            //General Intrinsics

            //Arithmetic with Overflow Intrinsics
            //Specialised Arithmetic Intrinsics
            //Half Precision Floating Point Intrinsics
            //Debugger Intrinsics
        default:break;
    }
}
示例#6
0
bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
    IntrinsicInst &I) const {
  assert(I.getIntrinsicID() == Intrinsic::bitreverse &&
         "I must be bitreverse intrinsic");
  assert(needsPromotionToI32(I.getType()) &&
         "I does not need promotion to i32");

  IRBuilder<> Builder(&I);
  Builder.SetCurrentDebugLocation(I.getDebugLoc());

  Type *I32Ty = getI32Ty(Builder, I.getType());
  Function *I32 =
      Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty });
  Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty);
  Value *ExtRes = Builder.CreateCall(I32, { ExtOp });
  Value *LShrOp =
      Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType()));
  Value *TruncRes =
      Builder.CreateTrunc(LShrOp, I.getType());

  I.replaceAllUsesWith(TruncRes);
  I.eraseFromParent();

  return true;
}
示例#7
0
/// getStoredPointerOperand - Return the pointer that is being written to.
static Value *getStoredPointerOperand(Instruction *I) {
  if (StoreInst *SI = dyn_cast<StoreInst>(I))
    return SI->getPointerOperand();
  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
    return MI->getDest();

  IntrinsicInst *II = cast<IntrinsicInst>(I);
  switch (II->getIntrinsicID()) {
  default: llvm_unreachable("Unexpected intrinsic!");
  case Intrinsic::init_trampoline:
    return II->getArgOperand(0);
  }
}
示例#8
0
/// isShortenable - Returns true if this instruction can be safely shortened in
/// length.
static bool isShortenable(Instruction *I) {
  // Don't shorten stores for now
  if (isa<StoreInst>(I))
    return false;
  
  IntrinsicInst *II = cast<IntrinsicInst>(I);
  switch (II->getIntrinsicID()) {
    default: return false;
    case Intrinsic::memset:
    case Intrinsic::memcpy:
      // Do shorten memory intrinsics.
      return true;
  }
}
示例#9
0
bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers(
    Instruction *Addr) const {
  AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets());
  Function *Func = (*Blocks.begin())->getParent();
  for (BasicBlock &BB : *Func) {
    if (Blocks.count(&BB))
      continue;
    for (Instruction &II : BB) {

      if (isa<DbgInfoIntrinsic>(II))
        continue;

      unsigned Opcode = II.getOpcode();
      Value *MemAddr = nullptr;
      switch (Opcode) {
      case Instruction::Store:
      case Instruction::Load: {
        if (Opcode == Instruction::Store) {
          StoreInst *SI = cast<StoreInst>(&II);
          MemAddr = SI->getPointerOperand();
        } else {
          LoadInst *LI = cast<LoadInst>(&II);
          MemAddr = LI->getPointerOperand();
        }
        // Global variable can not be aliased with locals.
        if (dyn_cast<Constant>(MemAddr))
          break;
        Value *Base = MemAddr->stripInBoundsConstantOffsets();
        if (!dyn_cast<AllocaInst>(Base) || Base == AI)
          return false;
        break;
      }
      default: {
        IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II);
        if (IntrInst) {
          if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
              IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
            break;
          return false;
        }
        // Treat all the other cases conservatively if it has side effects.
        if (II.mayHaveSideEffects())
          return false;
      }
      }
    }
  }

  return true;
}
示例#10
0
bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
  switch (I.getIntrinsicID()) {
  case Intrinsic::bitreverse:
    return visitBitreverseIntrinsicInst(I);
  default:
    return false;
  }
}
示例#11
0
/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
void DwarfEHPrepare::
FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
                        SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) {
  for (Value::use_iterator
         I = SelectorIntrinsic->use_begin(),
         E = SelectorIntrinsic->use_end(); I != E; ++I) {
    IntrinsicInst *II = cast<IntrinsicInst>(*I);

    if (II->getParent()->getParent() != F)
      continue;

    if (!HasCatchAllInSelector(II))
      Sels.insert(II);
    else
      CatchAllSels.insert(II);
  }
}
示例#12
0
// TODO: Ideally  we should share Inliner's InlineCost Analysis code.
// For now use a simplified version. The returned 'InlineCost' will be used
// to esimate the size cost as well as runtime cost of the BB.
int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
  int InlineCost = 0;
  const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
    if (isa<DbgInfoIntrinsic>(I))
      continue;

    switch (I->getOpcode()) {
    case Instruction::BitCast:
    case Instruction::PtrToInt:
    case Instruction::IntToPtr:
    case Instruction::Alloca:
      continue;
    case Instruction::GetElementPtr:
      if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
        continue;
    default:
      break;
    }

    IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
    if (IntrInst) {
      if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
          IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
        continue;
    }

    if (CallInst *CI = dyn_cast<CallInst>(I)) {
      InlineCost += getCallsiteCost(CallSite(CI), DL);
      continue;
    }

    if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
      InlineCost += getCallsiteCost(CallSite(II), DL);
      continue;
    }

    if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
      InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
      continue;
    }
    InlineCost += InlineConstants::InstrCost;
  }
  return InlineCost;
}
示例#13
0
bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
  bool Changed = false;

  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
      DA->isUniform(&I))
    Changed |= promoteUniformBitreverseToI32(I);

  return Changed;
}
示例#14
0
/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow
/// analysis and optimization.
///
/// \return A new value representing the non-overflowing add if possible,
/// otherwise return the original value.
Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
                                                    const DominatorTree *DT) {
  IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser);
  if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow)
    return IVUser;

  // Find a branch guarded by the overflow check.
  BranchInst *Branch = 0;
  Instruction *AddVal = 0;
  for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
       UI != E; ++UI) {
    if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(*UI)) {
      if (ExtractInst->getNumIndices() != 1)
        continue;
      if (ExtractInst->getIndices()[0] == 0)
        AddVal = ExtractInst;
      else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse())
        Branch = dyn_cast<BranchInst>(ExtractInst->use_back());
    }
  }
  if (!AddVal || !Branch)
    return IVUser;

  BasicBlock *ContinueBB = Branch->getSuccessor(1);
  if (llvm::next(pred_begin(ContinueBB)) != pred_end(ContinueBB))
    return IVUser;

  // Check if all users of the add are provably NSW.
  bool AllNSW = true;
  for (Value::use_iterator UI = AddVal->use_begin(), E = AddVal->use_end();
       UI != E; ++UI) {
    if (Instruction *UseInst = dyn_cast<Instruction>(*UI)) {
      BasicBlock *UseBB = UseInst->getParent();
      if (PHINode *PHI = dyn_cast<PHINode>(UseInst))
        UseBB = PHI->getIncomingBlock(UI);
      if (!DT->dominates(ContinueBB, UseBB)) {
        AllNSW = false;
        break;
      }
    }
  }
  if (!AllNSW)
    return IVUser;

  // Go for it...
  IRBuilder<> Builder(IVUser);
  Instruction *AddInst = dyn_cast<Instruction>(
    Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1)));

  // The caller expects the new add to have the same form as the intrinsic. The
  // IV operand position must be the same.
  assert((AddInst->getOpcode() == Instruction::Add &&
          AddInst->getOperand(0) == II->getOperand(0)) &&
         "Bad add instruction created from overflow intrinsic.");

  AddVal->replaceAllUsesWith(AddInst);
  DeadInsts.push_back(AddVal);
  return AddInst;
}
示例#15
0
static bool isCallPromotable(CallInst *CI) {
  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
  if (!II)
    return false;

  switch (II->getIntrinsicID()) {
  case Intrinsic::memcpy:
  case Intrinsic::memmove:
  case Intrinsic::memset:
  case Intrinsic::lifetime_start:
  case Intrinsic::lifetime_end:
  case Intrinsic::invariant_start:
  case Intrinsic::invariant_end:
  case Intrinsic::invariant_group_barrier:
  case Intrinsic::objectsize:
    return true;
  default:
    return false;
  }
}
bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
                                                bool &ModifiedDT) {
  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
  if (II) {
    switch (II->getIntrinsicID()) {
    default:
      break;
    case Intrinsic::masked_load:
      // Scalarize unsupported vector masked load
      if (!TTI->isLegalMaskedLoad(CI->getType())) {
        scalarizeMaskedLoad(CI);
        ModifiedDT = true;
        return true;
      }
      return false;
    case Intrinsic::masked_store:
      if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
        scalarizeMaskedStore(CI);
        ModifiedDT = true;
        return true;
      }
      return false;
    case Intrinsic::masked_gather:
      if (!TTI->isLegalMaskedGather(CI->getType())) {
        scalarizeMaskedGather(CI);
        ModifiedDT = true;
        return true;
      }
      return false;
    case Intrinsic::masked_scatter:
      if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
        scalarizeMaskedScatter(CI);
        ModifiedDT = true;
        return true;
      }
      return false;
    }
  }

  return false;
}
示例#17
0
/// isRemovable - If the value of this instruction and the memory it writes to
/// is unused, may we delete this instruction?
static bool isRemovable(Instruction *I) {
  // Don't remove volatile/atomic stores.
  if (StoreInst *SI = dyn_cast<StoreInst>(I))
    return SI->isUnordered();

  IntrinsicInst *II = cast<IntrinsicInst>(I);
  switch (II->getIntrinsicID()) {
  default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
  case Intrinsic::lifetime_end:
    // Never remove dead lifetime_end's, e.g. because it is followed by a
    // free.
    return false;
  case Intrinsic::init_trampoline:
    // Always safe to remove init_trampoline.
    return true;

  case Intrinsic::memset:
  case Intrinsic::memmove:
  case Intrinsic::memcpy:
    // Don't remove volatile memory intrinsics.
    return !cast<MemIntrinsic>(II)->isVolatile();
  }
}
示例#18
0
/// getLocForWrite - Return a Location stored to by the specified instruction.
/// If isRemovable returns true, this function and getLocForRead completely
/// describe the memory operations for this instruction.
static AliasAnalysis::Location
getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
  const DataLayout *DL = AA.getDataLayout();
  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
    return AA.getLocation(SI);

  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
    // memcpy/memmove/memset.
    AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
    // If we don't have target data around, an unknown size in Location means
    // that we should use the size of the pointee type.  This isn't valid for
    // memset/memcpy, which writes more than an i8.
    if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr)
      return AliasAnalysis::Location();
    return Loc;
  }

  IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
  if (!II) return AliasAnalysis::Location();

  switch (II->getIntrinsicID()) {
  default: return AliasAnalysis::Location(); // Unhandled intrinsic.
  case Intrinsic::init_trampoline:
    // If we don't have target data around, an unknown size in Location means
    // that we should use the size of the pointee type.  This isn't valid for
    // init.trampoline, which writes more than an i8.
    if (!DL) return AliasAnalysis::Location();

    // FIXME: We don't know the size of the trampoline, so we can't really
    // handle it here.
    return AliasAnalysis::Location(II->getArgOperand(0));
  case Intrinsic::lifetime_end: {
    uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
    return AliasAnalysis::Location(II->getArgOperand(1), Len);
  }
  }
}
示例#19
0
/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
/// the "llvm.eh.catch.all.value" call need to convert to using its
/// initializer instead.
bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
  if (!EHCatchAllValue) return false;

  if (!SelectorIntrinsic) {
    SelectorIntrinsic =
      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
    if (!SelectorIntrinsic) return false;
  }

  bool Changed = false;
  for (SmallPtrSet<IntrinsicInst*, 32>::iterator
         I = Sels.begin(), E = Sels.end(); I != E; ++I) {
    IntrinsicInst *Sel = *I;

    // Index of the "llvm.eh.catch.all.value" variable.
    unsigned OpIdx = Sel->getNumArgOperands() - 1;
    GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
    if (GV != EHCatchAllValue) continue;
    Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer());
    Changed = true;
  }

  return Changed;
}
示例#20
0
bool IntrinsicCleanerPass::runOnBasicBlock(BasicBlock &b, Module &M) {
  bool dirty = false;
  bool block_split=false;
  
#if LLVM_VERSION_CODE <= LLVM_VERSION(3, 1)
  unsigned WordSize = TargetData.getPointerSizeInBits() / 8;
#else
  unsigned WordSize = DataLayout.getPointerSizeInBits() / 8;
#endif
  for (BasicBlock::iterator i = b.begin(), ie = b.end();
       (i != ie) && (block_split == false);) {
    IntrinsicInst *ii = dyn_cast<IntrinsicInst>(&*i);
    // increment now since LowerIntrinsic deletion makes iterator invalid.
    ++i;  
    if(ii) {
      switch (ii->getIntrinsicID()) {
      case Intrinsic::vastart:
      case Intrinsic::vaend:
        break;
        
        // Lower vacopy so that object resolution etc is handled by
        // normal instructions.
        //
        // FIXME: This is much more target dependent than just the word size,
        // however this works for x86-32 and x86-64.
      case Intrinsic::vacopy: { // (dst, src) -> *((i8**) dst) = *((i8**) src)
        Value *dst = ii->getArgOperand(0);
        Value *src = ii->getArgOperand(1);

        if (WordSize == 4) {
          Type *i8pp = PointerType::getUnqual(PointerType::getUnqual(Type::getInt8Ty(getGlobalContext())));
          Value *castedDst = CastInst::CreatePointerCast(dst, i8pp, "vacopy.cast.dst", ii);
          Value *castedSrc = CastInst::CreatePointerCast(src, i8pp, "vacopy.cast.src", ii);
          Value *load = new LoadInst(castedSrc, "vacopy.read", ii);
          new StoreInst(load, castedDst, false, ii);
        } else {
          assert(WordSize == 8 && "Invalid word size!");
          Type *i64p = PointerType::getUnqual(Type::getInt64Ty(getGlobalContext()));
          Value *pDst = CastInst::CreatePointerCast(dst, i64p, "vacopy.cast.dst", ii);
          Value *pSrc = CastInst::CreatePointerCast(src, i64p, "vacopy.cast.src", ii);
          Value *val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii);
          Value *off = ConstantInt::get(Type::getInt64Ty(getGlobalContext()), 1);
          pDst = GetElementPtrInst::Create(pDst, off, std::string(), ii);
          pSrc = GetElementPtrInst::Create(pSrc, off, std::string(), ii);
          val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii);
          pDst = GetElementPtrInst::Create(pDst, off, std::string(), ii);
          pSrc = GetElementPtrInst::Create(pSrc, off, std::string(), ii);
          val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii);
        }
        ii->removeFromParent();
        delete ii;
        break;
      }

      case Intrinsic::sadd_with_overflow:
      case Intrinsic::ssub_with_overflow:
      case Intrinsic::smul_with_overflow:
      case Intrinsic::uadd_with_overflow:
      case Intrinsic::usub_with_overflow:
      case Intrinsic::umul_with_overflow: {
        IRBuilder<> builder(ii->getParent(), ii);

        Value *op1 = ii->getArgOperand(0);
        Value *op2 = ii->getArgOperand(1);
        
        Value *result = 0;
        Value *result_ext = 0;
        Value *overflow = 0;

        unsigned int bw = op1->getType()->getPrimitiveSizeInBits();
        unsigned int bw2 = op1->getType()->getPrimitiveSizeInBits()*2;

        if ((ii->getIntrinsicID() == Intrinsic::uadd_with_overflow) ||
            (ii->getIntrinsicID() == Intrinsic::usub_with_overflow) ||
            (ii->getIntrinsicID() == Intrinsic::umul_with_overflow)) {

          Value *op1ext =
            builder.CreateZExt(op1, IntegerType::get(M.getContext(), bw2));
          Value *op2ext =
            builder.CreateZExt(op2, IntegerType::get(M.getContext(), bw2));
          Value *int_max_s =
            ConstantInt::get(op1->getType(), APInt::getMaxValue(bw));
          Value *int_max =
            builder.CreateZExt(int_max_s, IntegerType::get(M.getContext(), bw2));

          if (ii->getIntrinsicID() == Intrinsic::uadd_with_overflow){
            result_ext = builder.CreateAdd(op1ext, op2ext);
          } else if (ii->getIntrinsicID() == Intrinsic::usub_with_overflow){
            result_ext = builder.CreateSub(op1ext, op2ext);
          } else if (ii->getIntrinsicID() == Intrinsic::umul_with_overflow){
            result_ext = builder.CreateMul(op1ext, op2ext);
          }
          overflow = builder.CreateICmpUGT(result_ext, int_max);

        } else if ((ii->getIntrinsicID() == Intrinsic::sadd_with_overflow) ||
                   (ii->getIntrinsicID() == Intrinsic::ssub_with_overflow) ||
                   (ii->getIntrinsicID() == Intrinsic::smul_with_overflow)) {

          Value *op1ext =
            builder.CreateSExt(op1, IntegerType::get(M.getContext(), bw2));
          Value *op2ext =
            builder.CreateSExt(op2, IntegerType::get(M.getContext(), bw2));
          Value *int_max_s =
            ConstantInt::get(op1->getType(), APInt::getSignedMaxValue(bw));
          Value *int_min_s =
            ConstantInt::get(op1->getType(), APInt::getSignedMinValue(bw));
          Value *int_max =
            builder.CreateSExt(int_max_s, IntegerType::get(M.getContext(), bw2));
          Value *int_min =
            builder.CreateSExt(int_min_s, IntegerType::get(M.getContext(), bw2));

          if (ii->getIntrinsicID() == Intrinsic::sadd_with_overflow){
            result_ext = builder.CreateAdd(op1ext, op2ext);
          } else if (ii->getIntrinsicID() == Intrinsic::ssub_with_overflow){
            result_ext = builder.CreateSub(op1ext, op2ext);
          } else if (ii->getIntrinsicID() == Intrinsic::smul_with_overflow){
            result_ext = builder.CreateMul(op1ext, op2ext);
          }
          overflow = builder.CreateOr(builder.CreateICmpSGT(result_ext, int_max),
                                      builder.CreateICmpSLT(result_ext, int_min));
        }

        // This trunc cound be replaced by a more general trunc replacement
        // that allows to detect also undefined behavior in assignments or
        // overflow in operation with integers whose dimension is smaller than
        // int's dimension, e.g.
        //     uint8_t = uint8_t + uint8_t;
        // if one desires the wrapping should write
        //     uint8_t = (uint8_t + uint8_t) & 0xFF;
        // before this, must check if it has side effects on other operations
        result = builder.CreateTrunc(result_ext, op1->getType());
        Value *resultStruct =
          builder.CreateInsertValue(UndefValue::get(ii->getType()), result, 0);
        resultStruct = builder.CreateInsertValue(resultStruct, overflow, 1);
        
        ii->replaceAllUsesWith(resultStruct);
        ii->removeFromParent();
        delete ii;
        dirty = true;
        break;
      }

      case Intrinsic::dbg_value:
      case Intrinsic::dbg_declare:
        // Remove these regardless of lower intrinsics flag. This can
        // be removed once IntrinsicLowering is fixed to not have bad
        // caches.
        ii->eraseFromParent();
        dirty = true;
        break;

      case Intrinsic::trap: {
        // Intrisic instruction "llvm.trap" found. Directly lower it to
        // a call of the abort() function.
        Function *F = cast<Function>(
          M.getOrInsertFunction(
            "abort", Type::getVoidTy(getGlobalContext()), NULL));
        F->setDoesNotReturn();
        F->setDoesNotThrow();

        CallInst::Create(F, Twine(), ii);
        new UnreachableInst(getGlobalContext(), ii);

        ii->eraseFromParent();

        dirty = true;
        break;
      }
      case Intrinsic::objectsize: {
        // We don't know the size of an object in general so we replace
        // with 0 or -1 depending on the second argument to the intrinsic.
        assert(ii->getNumArgOperands() == 2 && "wrong number of arguments");
        Value *minArg = ii->getArgOperand(1);
        assert(minArg && "Failed to get second argument");
        ConstantInt *minArgAsInt = dyn_cast<ConstantInt>(minArg);
        assert(minArgAsInt && "Second arg is not a ConstantInt");
        assert(minArgAsInt->getBitWidth() == 1 && "Second argument is not an i1");
        Value *replacement = NULL;
        LLVM_TYPE_Q IntegerType *intType = dyn_cast<IntegerType>(ii->getType());
        assert(intType && "intrinsic does not have integer return type");
        if (minArgAsInt->isZero()) {
          // min=false
          replacement = ConstantInt::get(intType, -1, /*isSigned=*/true);
        } else {
          // min=true
          replacement = ConstantInt::get(intType, 0, /*isSigned=*/false);
        }
        ii->replaceAllUsesWith(replacement);
        ii->eraseFromParent();
        dirty = true;
        break;
      }
      default:
        if (LowerIntrinsics)
          IL->LowerIntrinsicCall(ii);
        dirty = true;
        break;
      }
    }
  }

  return dirty;
}
void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
  IRBuilder<> Builder(&I);

  // First try to replace the alloca with a vector
  Type *AllocaTy = I.getAllocatedType();

  DEBUG(dbgs() << "Trying to promote " << I << '\n');

  if (tryPromoteAllocaToVector(&I))
    return;

  DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");

  // FIXME: This is the maximum work group size.  We should try to get
  // value from the reqd_work_group_size function attribute if it is
  // available.
  unsigned WorkGroupSize = 256;
  int AllocaSize = WorkGroupSize *
      Mod->getDataLayout()->getTypeAllocSize(AllocaTy);

  if (AllocaSize > LocalMemAvailable) {
    DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
    return;
  }

  std::vector<Value*> WorkList;

  if (!collectUsesWithPtrTypes(&I, WorkList)) {
    DEBUG(dbgs() << " Do not know how to convert all uses\n");
    return;
  }

  DEBUG(dbgs() << "Promoting alloca to local memory\n");
  LocalMemAvailable -= AllocaSize;

  GlobalVariable *GV = new GlobalVariable(
      *Mod, ArrayType::get(I.getAllocatedType(), 256), false,
      GlobalValue::ExternalLinkage, 0, I.getName(), 0,
      GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);

  FunctionType *FTy = FunctionType::get(
      Type::getInt32Ty(Mod->getContext()), false);
  AttributeSet AttrSet;
  AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);

  Value *ReadLocalSizeY = Mod->getOrInsertFunction(
      "llvm.r600.read.local.size.y", FTy, AttrSet);
  Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
      "llvm.r600.read.local.size.z", FTy, AttrSet);
  Value *ReadTIDIGX = Mod->getOrInsertFunction(
      "llvm.r600.read.tidig.x", FTy, AttrSet);
  Value *ReadTIDIGY = Mod->getOrInsertFunction(
      "llvm.r600.read.tidig.y", FTy, AttrSet);
  Value *ReadTIDIGZ = Mod->getOrInsertFunction(
      "llvm.r600.read.tidig.z", FTy, AttrSet);


  Value *TCntY = Builder.CreateCall(ReadLocalSizeY);
  Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ);
  Value *TIdX  = Builder.CreateCall(ReadTIDIGX);
  Value *TIdY  = Builder.CreateCall(ReadTIDIGY);
  Value *TIdZ  = Builder.CreateCall(ReadTIDIGZ);

  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
  TID = Builder.CreateAdd(TID, TIdZ);

  std::vector<Value*> Indices;
  Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
  Indices.push_back(TID);

  Value *Offset = Builder.CreateGEP(GV, Indices);
  I.mutateType(Offset->getType());
  I.replaceAllUsesWith(Offset);
  I.eraseFromParent();

  for (std::vector<Value*>::iterator i = WorkList.begin(),
                                     e = WorkList.end(); i != e; ++i) {
    Value *V = *i;
    CallInst *Call = dyn_cast<CallInst>(V);
    if (!Call) {
      Type *EltTy = V->getType()->getPointerElementType();
      PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);

      // The operand's value should be corrected on its own.
      if (isa<AddrSpaceCastInst>(V))
        continue;

      // FIXME: It doesn't really make sense to try to do this for all
      // instructions.
      V->mutateType(NewTy);
      continue;
    }

    IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
    if (!Intr) {
      std::vector<Type*> ArgTypes;
      for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
                                ArgIdx != ArgEnd; ++ArgIdx) {
        ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
      }
      Function *F = Call->getCalledFunction();
      FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
                                                F->isVarArg());
      Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
                                             F->getAttributes());
      Function *NewF = cast<Function>(C);
      Call->setCalledFunction(NewF);
      continue;
    }

    Builder.SetInsertPoint(Intr);
    switch (Intr->getIntrinsicID()) {
    case Intrinsic::lifetime_start:
    case Intrinsic::lifetime_end:
      // These intrinsics are for address space 0 only
      Intr->eraseFromParent();
      continue;
    case Intrinsic::memcpy: {
      MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
      Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
                           MemCpy->getLength(), MemCpy->getAlignment(),
                           MemCpy->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::memset: {
      MemSetInst *MemSet = cast<MemSetInst>(Intr);
      Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
                           MemSet->getLength(), MemSet->getAlignment(),
                           MemSet->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    default:
      Intr->dump();
      llvm_unreachable("Don't know how to promote alloca intrinsic use.");
    }
  }
}
示例#22
0
void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands,
                                BasicBlock *&ExitBlock) const {
  Function *Func = (*Blocks.begin())->getParent();
  ExitBlock = getCommonExitBlock(Blocks);

  for (BasicBlock &BB : *Func) {
    if (Blocks.count(&BB))
      continue;
    for (Instruction &II : BB) {
      auto *AI = dyn_cast<AllocaInst>(&II);
      if (!AI)
        continue;

      // Find the pair of life time markers for address 'Addr' that are either
      // defined inside the outline region or can legally be shrinkwrapped into
      // the outline region. If there are not other untracked uses of the
      // address, return the pair of markers if found; otherwise return a pair
      // of nullptr.
      auto GetLifeTimeMarkers =
          [&](Instruction *Addr, bool &SinkLifeStart,
              bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> {
        Instruction *LifeStart = nullptr, *LifeEnd = nullptr;

        for (User *U : Addr->users()) {
          IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U);
          if (IntrInst) {
            if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) {
              // Do not handle the case where AI has multiple start markers.
              if (LifeStart)
                return std::make_pair<Instruction *>(nullptr, nullptr);
              LifeStart = IntrInst;
            }
            if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) {
              if (LifeEnd)
                return std::make_pair<Instruction *>(nullptr, nullptr);
              LifeEnd = IntrInst;
            }
            continue;
          }
          // Find untracked uses of the address, bail.
          if (!definedInRegion(Blocks, U))
            return std::make_pair<Instruction *>(nullptr, nullptr);
        }

        if (!LifeStart || !LifeEnd)
          return std::make_pair<Instruction *>(nullptr, nullptr);

        SinkLifeStart = !definedInRegion(Blocks, LifeStart);
        HoistLifeEnd = !definedInRegion(Blocks, LifeEnd);
        // Do legality Check.
        if ((SinkLifeStart || HoistLifeEnd) &&
            !isLegalToShrinkwrapLifetimeMarkers(Addr))
          return std::make_pair<Instruction *>(nullptr, nullptr);

        // Check to see if we have a place to do hoisting, if not, bail.
        if (HoistLifeEnd && !ExitBlock)
          return std::make_pair<Instruction *>(nullptr, nullptr);

        return std::make_pair(LifeStart, LifeEnd);
      };

      bool SinkLifeStart = false, HoistLifeEnd = false;
      auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd);

      if (Markers.first) {
        if (SinkLifeStart)
          SinkCands.insert(Markers.first);
        SinkCands.insert(AI);
        if (HoistLifeEnd)
          HoistCands.insert(Markers.second);
        continue;
      }

      // Follow the bitcast.
      Instruction *MarkerAddr = nullptr;
      for (User *U : AI->users()) {

        if (U->stripInBoundsConstantOffsets() == AI) {
          SinkLifeStart = false;
          HoistLifeEnd = false;
          Instruction *Bitcast = cast<Instruction>(U);
          Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd);
          if (Markers.first) {
            MarkerAddr = Bitcast;
            continue;
          }
        }

        // Found unknown use of AI.
        if (!definedInRegion(Blocks, U)) {
          MarkerAddr = nullptr;
          break;
        }
      }

      if (MarkerAddr) {
        if (SinkLifeStart)
          SinkCands.insert(Markers.first);
        if (!definedInRegion(Blocks, MarkerAddr))
          SinkCands.insert(MarkerAddr);
        SinkCands.insert(AI);
        if (HoistLifeEnd)
          HoistCands.insert(Markers.second);
      }
    }
  }
}
示例#23
0
/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The
/// "unwind" part of these invokes jump to a landing pad within the current
/// function. This is a candidate to merge the selector associated with the URoR
/// invoke with the one from the URoR's landing pad.
bool DwarfEHPrepare::HandleURoRInvokes() {
  if (!EHCatchAllValue) {
    EHCatchAllValue =
      F->getParent()->getNamedGlobal("llvm.eh.catch.all.value");
    if (!EHCatchAllValue) return false;
  }

  if (!SelectorIntrinsic) {
    SelectorIntrinsic =
      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
    if (!SelectorIntrinsic) return false;
  }

  SmallPtrSet<IntrinsicInst*, 32> Sels;
  SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
  FindAllCleanupSelectors(Sels, CatchAllSels);

  if (!DT)
    // We require DominatorTree information.
    return CleanupSelectors(CatchAllSels);

  if (!URoR) {
    URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
    if (!URoR) return CleanupSelectors(CatchAllSels);
  }

  SmallPtrSet<InvokeInst*, 32> URoRInvokes;
  FindAllURoRInvokes(URoRInvokes);

  SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;

  for (SmallPtrSet<IntrinsicInst*, 32>::iterator
         SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) {
    const BasicBlock *SelBB = (*SI)->getParent();
    for (SmallPtrSet<InvokeInst*, 32>::iterator
           UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
      const BasicBlock *URoRBB = (*UI)->getParent();
      if (DT->dominates(SelBB, URoRBB)) {
        SelsToConvert.insert(*SI);
        break;
      }
    }
  }

  bool Changed = false;

  if (Sels.size() != SelsToConvert.size()) {
    // If we haven't been able to convert all of the clean-up selectors, then
    // loop through the slow way to see if they still need to be converted.
    if (!ExceptionValueIntrinsic) {
      ExceptionValueIntrinsic =
        Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
      if (!ExceptionValueIntrinsic)
        return CleanupSelectors(CatchAllSels);
    }

    for (Value::use_iterator
           I = ExceptionValueIntrinsic->use_begin(),
           E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
      IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I);
      if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;

      Changed |= PromoteEHPtrStore(EHPtr);

      bool URoRInvoke = false;
      SmallPtrSet<IntrinsicInst*, 8> SelCalls;
      Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls);

      if (URoRInvoke) {
        // This EH pointer is being used by an invoke of an URoR instruction and
        // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
        // need to convert it to a 'catch-all'.
        for (SmallPtrSet<IntrinsicInst*, 8>::iterator
               SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI)
          if (!HasCatchAllInSelector(*SI))
              SelsToConvert.insert(*SI);
      }
    }
  }

  if (!SelsToConvert.empty()) {
    // Convert all clean-up eh.selectors, which are associated with "invokes" of
    // URoR calls, into catch-all eh.selectors.
    Changed = true;

    for (SmallPtrSet<IntrinsicInst*, 8>::iterator
           SI = SelsToConvert.begin(), SE = SelsToConvert.end();
         SI != SE; ++SI) {
      IntrinsicInst *II = *SI;

      // Use the exception object pointer and the personality function
      // from the original selector.
      CallSite CS(II);
      IntrinsicInst::op_iterator I = CS.arg_begin();
      IntrinsicInst::op_iterator E = CS.arg_end();
      IntrinsicInst::op_iterator B = prior(E);

      // Exclude last argument if it is an integer.
      if (isa<ConstantInt>(B)) E = B;

      // Add exception object pointer (front).
      // Add personality function (next).
      // Add in any filter IDs (rest).
      SmallVector<Value*, 8> Args(I, E);

      Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.

      CallInst *NewSelector =
        CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(),
                         "eh.sel.catch.all", II);

      NewSelector->setTailCall(II->isTailCall());
      NewSelector->setAttributes(II->getAttributes());
      NewSelector->setCallingConv(II->getCallingConv());

      II->replaceAllUsesWith(NewSelector);
      II->eraseFromParent();
    }
  }

  Changed |= CleanupSelectors(CatchAllSels);
  return Changed;
}
示例#24
0
// FIXME: Should try to pick the most likely to be profitable allocas first.
bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
  // Array allocations are probably not worth handling, since an allocation of
  // the array type is the canonical form.
  if (!I.isStaticAlloca() || I.isArrayAllocation())
    return false;

  IRBuilder<> Builder(&I);

  // First try to replace the alloca with a vector
  Type *AllocaTy = I.getAllocatedType();

  DEBUG(dbgs() << "Trying to promote " << I << '\n');

  if (tryPromoteAllocaToVector(&I, AS))
    return true; // Promoted to vector.

  const Function &ContainingFunction = *I.getParent()->getParent();
  CallingConv::ID CC = ContainingFunction.getCallingConv();

  // Don't promote the alloca to LDS for shader calling conventions as the work
  // item ID intrinsics are not supported for these calling conventions.
  // Furthermore not all LDS is available for some of the stages.
  switch (CC) {
  case CallingConv::AMDGPU_KERNEL:
  case CallingConv::SPIR_KERNEL:
    break;
  default:
    DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n");
    return false;
  }

  // Not likely to have sufficient local memory for promotion.
  if (!SufficientLDS)
    return false;

  const AMDGPUSubtarget &ST =
    TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
  unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;

  const DataLayout &DL = Mod->getDataLayout();

  unsigned Align = I.getAlignment();
  if (Align == 0)
    Align = DL.getABITypeAlignment(I.getAllocatedType());

  // FIXME: This computed padding is likely wrong since it depends on inverse
  // usage order.
  //
  // FIXME: It is also possible that if we're allowed to use all of the memory
  // could could end up using more than the maximum due to alignment padding.

  uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
  uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
  NewSize += AllocSize;

  if (NewSize > LocalMemLimit) {
    DEBUG(dbgs() << "  " << AllocSize
          << " bytes of local memory not available to promote\n");
    return false;
  }

  CurrentLocalMemUsage = NewSize;

  std::vector<Value*> WorkList;

  if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
    DEBUG(dbgs() << " Do not know how to convert all uses\n");
    return false;
  }

  DEBUG(dbgs() << "Promoting alloca to local memory\n");

  Function *F = I.getParent()->getParent();

  Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
  GlobalVariable *GV = new GlobalVariable(
      *Mod, GVTy, false, GlobalValue::InternalLinkage,
      UndefValue::get(GVTy),
      Twine(F->getName()) + Twine('.') + I.getName(),
      nullptr,
      GlobalVariable::NotThreadLocal,
      AS.LOCAL_ADDRESS);
  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
  GV->setAlignment(I.getAlignment());

  Value *TCntY, *TCntZ;

  std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
  Value *TIdX = getWorkitemID(Builder, 0);
  Value *TIdY = getWorkitemID(Builder, 1);
  Value *TIdZ = getWorkitemID(Builder, 2);

  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
  TID = Builder.CreateAdd(TID, TIdZ);

  Value *Indices[] = {
    Constant::getNullValue(Type::getInt32Ty(Mod->getContext())),
    TID
  };

  Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
  I.mutateType(Offset->getType());
  I.replaceAllUsesWith(Offset);
  I.eraseFromParent();

  for (Value *V : WorkList) {
    CallInst *Call = dyn_cast<CallInst>(V);
    if (!Call) {
      if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
        Value *Src0 = CI->getOperand(0);
        Type *EltTy = Src0->getType()->getPointerElementType();
        PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);

        if (isa<ConstantPointerNull>(CI->getOperand(0)))
          CI->setOperand(0, ConstantPointerNull::get(NewTy));

        if (isa<ConstantPointerNull>(CI->getOperand(1)))
          CI->setOperand(1, ConstantPointerNull::get(NewTy));

        continue;
      }

      // The operand's value should be corrected on its own and we don't want to
      // touch the users.
      if (isa<AddrSpaceCastInst>(V))
        continue;

      Type *EltTy = V->getType()->getPointerElementType();
      PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);

      // FIXME: It doesn't really make sense to try to do this for all
      // instructions.
      V->mutateType(NewTy);

      // Adjust the types of any constant operands.
      if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
        if (isa<ConstantPointerNull>(SI->getOperand(1)))
          SI->setOperand(1, ConstantPointerNull::get(NewTy));

        if (isa<ConstantPointerNull>(SI->getOperand(2)))
          SI->setOperand(2, ConstantPointerNull::get(NewTy));
      } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
        for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
          if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
            Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
        }
      }

      continue;
    }

    IntrinsicInst *Intr = cast<IntrinsicInst>(Call);
    Builder.SetInsertPoint(Intr);
    switch (Intr->getIntrinsicID()) {
    case Intrinsic::lifetime_start:
    case Intrinsic::lifetime_end:
      // These intrinsics are for address space 0 only
      Intr->eraseFromParent();
      continue;
    case Intrinsic::memcpy: {
      MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
      Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(),
                           MemCpy->getRawSource(), MemCpy->getSourceAlignment(),
                           MemCpy->getLength(), MemCpy->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::memmove: {
      MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
      Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(),
                            MemMove->getRawSource(), MemMove->getSourceAlignment(),
                            MemMove->getLength(), MemMove->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::memset: {
      MemSetInst *MemSet = cast<MemSetInst>(Intr);
      Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
                           MemSet->getLength(), MemSet->getDestAlignment(),
                           MemSet->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::invariant_start:
    case Intrinsic::invariant_end:
    case Intrinsic::invariant_group_barrier:
      Intr->eraseFromParent();
      // FIXME: I think the invariant marker should still theoretically apply,
      // but the intrinsics need to be changed to accept pointers with any
      // address space.
      continue;
    case Intrinsic::objectsize: {
      Value *Src = Intr->getOperand(0);
      Type *SrcTy = Src->getType()->getPointerElementType();
      Function *ObjectSize = Intrinsic::getDeclaration(Mod,
        Intrinsic::objectsize,
        { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
      );

      CallInst *NewCall = Builder.CreateCall(
          ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)});
      Intr->replaceAllUsesWith(NewCall);
      Intr->eraseFromParent();
      continue;
    }
    default:
      Intr->print(errs());
      llvm_unreachable("Don't know how to promote alloca intrinsic use.");
    }
  }
  return true;
}
示例#25
0
/// getModRefInfo - Check to see if the specified callsite can clobber the
/// specified memory object.  Since we only look at local properties of this
/// function, we really can't say much about this query.  We do, however, use
/// simple "address taken" analysis on local objects.
AliasAnalysis::ModRefResult
BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
  const Value *Object = P->getUnderlyingObject();
  
  // If this is a tail call and P points to a stack location, we know that
  // the tail call cannot access or modify the local stack.
  // We cannot exclude byval arguments here; these belong to the caller of
  // the current function not to the current function, and a tail callee
  // may reference them.
  if (isa<AllocaInst>(Object))
    if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
      if (CI->isTailCall())
        return NoModRef;
  
  // If the pointer is to a locally allocated object that does not escape,
  // then the call can not mod/ref the pointer unless the call takes the pointer
  // as an argument, and itself doesn't capture it.
  if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
      isNonEscapingLocalObject(Object)) {
    bool PassedAsArg = false;
    unsigned ArgNo = 0;
    for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
         CI != CE; ++CI, ++ArgNo) {
      // Only look at the no-capture pointer arguments.
      if (!isa<PointerType>((*CI)->getType()) ||
          !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))
        continue;
      
      // If  this is a no-capture pointer argument, see if we can tell that it
      // is impossible to alias the pointer we're checking.  If not, we have to
      // assume that the call could touch the pointer, even though it doesn't
      // escape.
      if (!isNoAlias(cast<Value>(CI), ~0U, P, ~0U)) {
        PassedAsArg = true;
        break;
      }
    }
    
    if (!PassedAsArg)
      return NoModRef;
  }

  // Finally, handle specific knowledge of intrinsics.
  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
  if (II == 0)
    return AliasAnalysis::getModRefInfo(CS, P, Size);

  switch (II->getIntrinsicID()) {
  default: break;
  case Intrinsic::memcpy:
  case Intrinsic::memmove: {
    unsigned Len = ~0U;
    if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3)))
      Len = LenCI->getZExtValue();
    Value *Dest = II->getOperand(1);
    Value *Src = II->getOperand(2);
    if (isNoAlias(Dest, Len, P, Size)) {
      if (isNoAlias(Src, Len, P, Size))
        return NoModRef;
      return Ref;
    }
    break;
  }
  case Intrinsic::memset:
    // Since memset is 'accesses arguments' only, the AliasAnalysis base class
    // will handle it for the variable length case.
    if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3))) {
      unsigned Len = LenCI->getZExtValue();
      Value *Dest = II->getOperand(1);
      if (isNoAlias(Dest, Len, P, Size))
        return NoModRef;
    }
    break;
  case Intrinsic::atomic_cmp_swap:
  case Intrinsic::atomic_swap:
  case Intrinsic::atomic_load_add:
  case Intrinsic::atomic_load_sub:
  case Intrinsic::atomic_load_and:
  case Intrinsic::atomic_load_nand:
  case Intrinsic::atomic_load_or:
  case Intrinsic::atomic_load_xor:
  case Intrinsic::atomic_load_max:
  case Intrinsic::atomic_load_min:
  case Intrinsic::atomic_load_umax:
  case Intrinsic::atomic_load_umin:
    if (TD) {
      Value *Op1 = II->getOperand(1);
      unsigned Op1Size = TD->getTypeStoreSize(Op1->getType());
      if (isNoAlias(Op1, Op1Size, P, Size))
        return NoModRef;
    }
    break;
  case Intrinsic::lifetime_start:
  case Intrinsic::lifetime_end:
  case Intrinsic::invariant_start: {
    unsigned PtrSize = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
    if (isNoAlias(II->getOperand(2), PtrSize, P, Size))
      return NoModRef;
    break;
  }
  case Intrinsic::invariant_end: {
    unsigned PtrSize = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
    if (isNoAlias(II->getOperand(3), PtrSize, P, Size))
      return NoModRef;
    break;
  }
  }

  // The AliasAnalysis base class has some smarts, lets use them.
  return AliasAnalysis::getModRefInfo(CS, P, Size);
}
示例#26
0
void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
  // Array allocations are probably not worth handling, since an allocation of
  // the array type is the canonical form.
  if (!I.isStaticAlloca() || I.isArrayAllocation())
    return;

  IRBuilder<> Builder(&I);

  // First try to replace the alloca with a vector
  Type *AllocaTy = I.getAllocatedType();

  DEBUG(dbgs() << "Trying to promote " << I << '\n');

  if (tryPromoteAllocaToVector(&I))
    return;

  DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");

  const Function &ContainingFunction = *I.getParent()->getParent();

  // FIXME: We should also try to get this value from the reqd_work_group_size
  // function attribute if it is available.
  unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction);

  int AllocaSize =
      WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy);

  if (AllocaSize > LocalMemAvailable) {
    DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
    return;
  }

  std::vector<Value*> WorkList;

  if (!collectUsesWithPtrTypes(&I, WorkList)) {
    DEBUG(dbgs() << " Do not know how to convert all uses\n");
    return;
  }

  DEBUG(dbgs() << "Promoting alloca to local memory\n");
  LocalMemAvailable -= AllocaSize;

  Function *F = I.getParent()->getParent();

  Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
  GlobalVariable *GV = new GlobalVariable(
      *Mod, GVTy, false, GlobalValue::InternalLinkage,
      UndefValue::get(GVTy),
      Twine(F->getName()) + Twine('.') + I.getName(),
      nullptr,
      GlobalVariable::NotThreadLocal,
      AMDGPUAS::LOCAL_ADDRESS);
  GV->setUnnamedAddr(true);
  GV->setAlignment(I.getAlignment());

  Value *TCntY, *TCntZ;

  std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
  Value *TIdX = getWorkitemID(Builder, 0);
  Value *TIdY = getWorkitemID(Builder, 1);
  Value *TIdZ = getWorkitemID(Builder, 2);

  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
  TID = Builder.CreateAdd(TID, TIdZ);

  Value *Indices[] = {
    Constant::getNullValue(Type::getInt32Ty(Mod->getContext())),
    TID
  };

  Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
  I.mutateType(Offset->getType());
  I.replaceAllUsesWith(Offset);
  I.eraseFromParent();

  for (Value *V : WorkList) {
    CallInst *Call = dyn_cast<CallInst>(V);
    if (!Call) {
      Type *EltTy = V->getType()->getPointerElementType();
      PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);

      // The operand's value should be corrected on its own.
      if (isa<AddrSpaceCastInst>(V))
        continue;

      // FIXME: It doesn't really make sense to try to do this for all
      // instructions.
      V->mutateType(NewTy);
      continue;
    }

    IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
    if (!Intr) {
      // FIXME: What is this for? It doesn't make sense to promote arbitrary
      // function calls. If the call is to a defined function that can also be
      // promoted, we should be able to do this once that function is also
      // rewritten.

      std::vector<Type*> ArgTypes;
      for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
                                ArgIdx != ArgEnd; ++ArgIdx) {
        ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
      }
      Function *F = Call->getCalledFunction();
      FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
                                                F->isVarArg());
      Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(),
                                             NewType, F->getAttributes());
      Function *NewF = cast<Function>(C);
      Call->setCalledFunction(NewF);
      continue;
    }

    Builder.SetInsertPoint(Intr);
    switch (Intr->getIntrinsicID()) {
    case Intrinsic::lifetime_start:
    case Intrinsic::lifetime_end:
      // These intrinsics are for address space 0 only
      Intr->eraseFromParent();
      continue;
    case Intrinsic::memcpy: {
      MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
      Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
                           MemCpy->getLength(), MemCpy->getAlignment(),
                           MemCpy->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::memmove: {
      MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
      Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(),
                            MemMove->getLength(), MemMove->getAlignment(),
                            MemMove->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::memset: {
      MemSetInst *MemSet = cast<MemSetInst>(Intr);
      Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
                           MemSet->getLength(), MemSet->getAlignment(),
                           MemSet->isVolatile());
      Intr->eraseFromParent();
      continue;
    }
    case Intrinsic::invariant_start:
    case Intrinsic::invariant_end:
    case Intrinsic::invariant_group_barrier:
      Intr->eraseFromParent();
      // FIXME: I think the invariant marker should still theoretically apply,
      // but the intrinsics need to be changed to accept pointers with any
      // address space.
      continue;
    case Intrinsic::objectsize: {
      Value *Src = Intr->getOperand(0);
      Type *SrcTy = Src->getType()->getPointerElementType();
      Function *ObjectSize = Intrinsic::getDeclaration(Mod,
        Intrinsic::objectsize,
        { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) }
      );

      CallInst *NewCall
        = Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) });
      Intr->replaceAllUsesWith(NewCall);
      Intr->eraseFromParent();
      continue;
    }
    default:
      Intr->dump();
      llvm_unreachable("Don't know how to promote alloca intrinsic use.");
    }
  }
}
示例#27
0
void DecomposeInsts::decomposeIntrinsics(BasicBlock* bb)
{
    IRBuilder<> builder(module->getContext());

    for (BasicBlock::iterator instI = bb->begin(), instE = bb->end(); instI != instE; /* empty */) {
        Instruction* inst = instI;

        // Note this increment of instI will skip decompositions of the code
        // inserted to decompose.  E.g., if length -> dot, and dot is also to
        // be decomposed, then the decomposition of dot will be skipped
        // unless instI is reset.
        ++instI;

        IntrinsicInst* intrinsic = dyn_cast<IntrinsicInst>(inst);
        if (! intrinsic)
            continue;

        // Useful preamble for most case
        llvm::Value* arg0 = 0;
        llvm::Value* arg1 = 0;
        llvm::Value* arg2 = 0;
        if (inst->getNumOperands() > 0)
            arg0 = inst->getOperand(0);
        if (inst->getNumOperands() > 1)
            arg1 = inst->getOperand(1);
        if (inst->getNumOperands() > 2)
            arg2 = inst->getOperand(2);
        llvm::Value* newInst = 0;
        Type* instTypes[] = { inst->getType(), inst->getType(), inst->getType(), inst->getType() };
        Type* argTypes[] = { arg0->getType(), arg0->getType(), arg0->getType(), arg0->getType() };
        builder.SetInsertPoint(instI);

        switch (intrinsic->getIntrinsicID()) {
        case Intrinsic::gla_fRadians:
            {
                // always decompose
                // arg0 -> arg0 * pi / 180
                const double pi_over_180 = 0.01745329251994329576923690768489;
                newInst = MultiplyByConstant(builder, arg0, pi_over_180);
                break;
            }
        case Intrinsic::gla_fDegrees:
            {
                // always decompose
                // arg0 -> arg0 * 180 / pi
                const double pi_into_180 = 57.295779513082320876798154814105;
                newInst = MultiplyByConstant(builder, arg0, pi_into_180);
                break;
            }
        case Intrinsic::gla_fMin:
            if (backEnd->decomposeIntrinsic(EDiMin)) {
                //
                // min(a,b) = select (a < b), a, b
                //
                llvm::Value* smeared = Smear(builder, module, arg1, arg0);
                newInst = builder.CreateFCmpOLT(arg0, smeared);
                newInst = builder.CreateSelect(newInst, arg0, smeared);
            }
            break;
        case Intrinsic::gla_fMax:
            if (backEnd->decomposeIntrinsic(EDiMax)) {
                //
                // max(a,b) = select (a > b), a, b
                //
                llvm::Value* smeared = Smear(builder, module, arg1, arg0);
                newInst = builder.CreateFCmpOGT(arg0, smeared);
                newInst = builder.CreateSelect(newInst, arg0, smeared);
            }
            break;
        case Intrinsic::gla_fClamp:
            if (backEnd->decomposeIntrinsic(EDiClamp))
            {
                //
                // Clamp(x, minVal, maxVal) is defined to be min(max(x, minVal), maxVal).
                //
                // The 2nd and 3rd arguments match each other, but not necessarily
                // the 1st argument.  In the decomposition, this difference matches 
                // min/max's difference in their 1st and 2nd arguments.
                //
                argTypes[2] = arg1->getType();  // argTypes[*] start at 0 for the return value, arg* start at 0 for operand 0
                Function* max = Intrinsic::getDeclaration(module, Intrinsic::gla_fMax, makeArrayRef(argTypes, 3));
                Function* min = Intrinsic::getDeclaration(module, Intrinsic::gla_fMin, makeArrayRef(argTypes, 3));
                newInst = builder.CreateCall2(max, arg0, arg1);
                newInst = builder.CreateCall2(min, newInst, arg2);

                // Make next iteration revisit this decomposition, in case min
                // or max are decomposed.
                instI = inst;
                ++instI;
            }
            break;

        case Intrinsic::gla_fAsin:
            if (backEnd->decomposeIntrinsic(EDiAsin)) {
                UnsupportedFunctionality("decomposition of gla_fAsin");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fAcos:
            if (backEnd->decomposeIntrinsic(EDiAcos))
            {
                // TODO: functionality: Do we need to handle domain errors?  (E.g., bad input value)
                //
                // acos(x) ~= sqrt(1-x)*(a + x*(b + x*(c + x*d)))
                // where  a =  1.57079632679
                //        b = -0.213300989
                //        c =  0.077980478
                //        d = -0.0216409
                //
                double a =  1.57079632679;
                double b = -0.213300989;
                double c =  0.077980478;
                double d = -0.0216409;

                // polynomial part, going right to left...
                llvm::Value* poly;
                poly = MultiplyByConstant(builder, arg0, d);
                poly = AddWithConstant(builder, poly, c);
                poly = builder.CreateFMul(arg0, poly);
                poly = AddWithConstant(builder, poly, b);
                poly = builder.CreateFMul(arg0, poly);
                poly = AddWithConstant(builder, poly, a);

                // sqrt part
                Function* sqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fSqrt, makeArrayRef(argTypes, 2));
                newInst = builder.CreateFNeg(arg0);
                newInst = AddWithConstant(builder, newInst, 1.0);
                newInst = builder.CreateCall(sqrt, newInst);
                newInst = builder.CreateFMul(newInst, poly);
            }
            break;
        case Intrinsic::gla_fAtan:
            if (backEnd->decomposeIntrinsic(EDiAtan)) {
                UnsupportedFunctionality("decomposition of gla_fAtan");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fAtan2:
            if (backEnd->decomposeIntrinsic(EDiAtan2)) {
                UnsupportedFunctionality("decomposition of gla_fAtan2");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fCosh:
            if (backEnd->decomposeIntrinsic(EDiCosh)) {
                UnsupportedFunctionality("decomposition of gla_fCosh");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fSinh:
            if (backEnd->decomposeIntrinsic(EDiSinh)) {
                UnsupportedFunctionality("decomposition of gla_fSinh");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fTanh:
            if (backEnd->decomposeIntrinsic(EDiTanh)) {
                UnsupportedFunctionality("decomposition of gla_fTanh");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fAcosh:
            if (backEnd->decomposeIntrinsic(EDiACosh)) {
                UnsupportedFunctionality("decomposition of gla_fACosh");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fAsinh:
            if (backEnd->decomposeIntrinsic(EDiASinh)) {
                UnsupportedFunctionality("decomposition of gla_fASinh");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fAtanh:
            if (backEnd->decomposeIntrinsic(EDiATanh)) {
                UnsupportedFunctionality("decomposition of gla_fATanh");
                //changed = true;
            }
            break;

        case Intrinsic::gla_fPowi:
            if (backEnd->decomposeIntrinsic(EDiPowi)) {
                UnsupportedFunctionality("decomposition of gla_fPowi");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fExp10:
        case Intrinsic::gla_fExp:
            if ((intrinsic->getIntrinsicID() == Intrinsic::gla_fExp10 && backEnd->decomposeIntrinsic(EDiExp10)) ||
                (intrinsic->getIntrinsicID() == Intrinsic::gla_fExp   && backEnd->decomposeIntrinsic(EDiExp))) {
                //    10^X = 2^(X /(log base 10 of 2))
                // -> 10^X = 2^(X * 3.3219280948873623478703194294894)
                //
                //     e^X = 2^(X /(log base e of 2))
                // ->  e^X = 2^(X * 1.4426950408889634073599246810019)

                //const double inv_log10_e = 2.3025850929940456840179914546844;  // 10 -> e, in case it comes up
                const double inv_log10_2 = 3.3219280948873623478703194294894;  // 10 -> 2
                const double inv_loge_2  = 1.4426950408889634073599246810019;  //  e -> 2

                double multiplier;
                if (intrinsic->getIntrinsicID() == Intrinsic::gla_fExp10)
                    multiplier = inv_log10_2;
                else
                    multiplier = inv_loge_2;

                newInst = MultiplyByConstant(builder, arg0, multiplier);
                Function* exp = Intrinsic::getDeclaration(module, Intrinsic::gla_fExp2, makeArrayRef(argTypes, 2));
                newInst = builder.CreateCall(exp, newInst);
            }
            break;
        case Intrinsic::gla_fLog10:
        case Intrinsic::gla_fLog:
            if ((intrinsic->getIntrinsicID() == Intrinsic::gla_fLog10 && backEnd->decomposeIntrinsic(EDiLog10)) ||
                (intrinsic->getIntrinsicID() == Intrinsic::gla_fLog   && backEnd->decomposeIntrinsic(EDiLog))) {
                //    log base 10 of X = (log base 10 of 2) * (log base 2 of X)
                // -> log base 10 of X = 0.30102999566398119521373889472449 * (log base 2 of X)
                //
                //    log base e  of X = (log base e of 2) * (log base 2 of X)
                // -> log base e  of X = 0.69314718055994530941723212145818 * (log base 2 of X)

                //const double log10_e = 0.43429448190325182765112891891661;  // 10 -> e, in case it comes up
                const double log10_2 = 0.30102999566398119521373889472449;  // 10 -> 2
                const double loge_2  = 0.69314718055994530941723212145818;  //  e -> 2

                double multiplier;
                if (intrinsic->getIntrinsicID() == Intrinsic::gla_fLog10)
                    multiplier = log10_2;
                else
                    multiplier = loge_2;

                Function* log = Intrinsic::getDeclaration(module, Intrinsic::gla_fLog2, makeArrayRef(argTypes, 2));
                newInst = builder.CreateCall(log, arg0);
                newInst = MultiplyByConstant(builder, newInst, multiplier);
            }
            break;

        case Intrinsic::gla_fInverseSqrt:
            if (backEnd->decomposeIntrinsic(EDiInverseSqrt)) {
                Function* sqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fSqrt, makeArrayRef(argTypes, 2));
                newInst = builder.CreateCall(sqrt, arg0);
                newInst = builder.CreateFDiv(MakeFloatConstant(module->getContext(), 1.0), newInst);
            }
            break;
        case Intrinsic::gla_fFraction:
            if (backEnd->decomposeIntrinsic(EDiFraction)) {
                UnsupportedFunctionality("decomposition of gla_fFraction");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fSign:
            if (backEnd->decomposeIntrinsic(EDiSign)) {
                UnsupportedFunctionality("decomposition of gla_fSign");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fModF:
            if (backEnd->decomposeIntrinsic(EDiModF)) {
                UnsupportedFunctionality("decomposition of gla_fModF");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fMix:
            if (backEnd->decomposeIntrinsic(EDiMix)) {
                //
                // genType mix (x, y, a) = x * (1 - a) + y * a
                //
                llvm::Value* t;
                t = builder.CreateFNeg(arg2);
                t = AddWithConstant(builder, t, 1.0);
                t = builder.CreateFMul(arg0, t);
                newInst = builder.CreateFMul(arg1, arg2);
                newInst = builder.CreateFAdd(t, newInst);
            }
            break;
        case Intrinsic::gla_fStep:
            if (backEnd->decomposeIntrinsic(EDiStep))
            {
                //
                // step(edge, x) is defined to be 0.0 if x < edge, otherwise 1.0.
                //
                llvm::FCmpInst::Predicate predicate = llvm::FCmpInst::FCMP_OLT;
                llvm::Value* condition = builder.CreateFCmp(predicate, arg1, arg0);
                newInst = builder.CreateSelect(condition, VectorizeConstant(GetComponentCount(arg1), MakeFloatConstant(module->getContext(), 0.0)),
                                                          VectorizeConstant(GetComponentCount(arg1), MakeFloatConstant(module->getContext(), 1.0)));
            }
            break;
        case Intrinsic::gla_fSmoothStep:
            if (backEnd->decomposeIntrinsic(EDiSmoothStep)) {
                //
                // smoothstep (edge0, edge1, x) is defined to be
                //
                //   t = clamp((x – edge0) / (edge1 – edge0), 0, 1)
                //   t * t * (3 – 2 * t)
                //
                // where edge* can be scalar even if x is vector.
                //
                llvm::Value* smeared0 = Smear(builder, module, arg0, arg2);
                llvm::Value* smeared1 = Smear(builder, module, arg1, arg2);
                llvm::Value* numerator   = builder.CreateFSub(arg2, smeared0, "numerator");
                llvm::Value* denominator = builder.CreateFSub(smeared1, smeared0, "denominator");
                llvm::Value* quotient    = builder.CreateFDiv(numerator, denominator, "quotient");
                llvm::Value* zero = MakeFloatConstant(module->getContext(), 0.0);
                llvm::Value* one  = MakeFloatConstant(module->getContext(), 1.0);
                Type* newArgTypes[] = { quotient->getType(), quotient->getType(), zero->getType(), one->getType() };
                Function* clamp = Intrinsic::getDeclaration(module, Intrinsic::gla_fClamp, newArgTypes);
                llvm::Value* t = builder.CreateCall3(clamp, quotient, zero, one);
                newInst = MultiplyByConstant(builder, t, 2.0);
                newInst = SubFromConstant(builder, 3.0, newInst);
                newInst = builder.CreateFMul(t, newInst);
                newInst = builder.CreateFMul(t, newInst);
                
                // Make next iteration revisit this decomposition, in case clamp is
                // decomposed.
                instI = inst;
                ++instI;
            }
            break;
        case Intrinsic::gla_fIsNan:
            if (backEnd->decomposeIntrinsic(EDiIsNan)) {
                UnsupportedFunctionality("decomposition of gla_fIsNan");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fFma:
            if (backEnd->decomposeIntrinsic(EDiFma)) {
                UnsupportedFunctionality("decomposition of gla_Fma");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fPackUnorm2x16:
            if (backEnd->decomposeIntrinsic(EDiPackUnorm2x16)) {
                UnsupportedFunctionality("decomposition of gla_fPackUnorm2x16");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fPackUnorm4x8:
            if (backEnd->decomposeIntrinsic(EDiPackUnorm4x8)) {
                UnsupportedFunctionality("decomposition of gla_fPackUnorm4x8");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fPackSnorm4x8:
            if (backEnd->decomposeIntrinsic(EDiPackSnorm4x8)) {
                UnsupportedFunctionality("decomposition of gla_fPackSnorm4x8");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fUnpackUnorm2x16:
            if (backEnd->decomposeIntrinsic(EDiUnpackUnorm2x16)) {
                UnsupportedFunctionality("decomposition of gla_fUnpackUnorm2x16");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fUnpackUnorm4x8:
            if (backEnd->decomposeIntrinsic(EDiUnpackUnorm4x8)) {
                UnsupportedFunctionality("decomposition of gla_fUnpackUnorm4x8");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fUnpackSnorm4x8:
            if (backEnd->decomposeIntrinsic(EDiUnpackSnorm4x8)) {
                UnsupportedFunctionality("decomposition of gla_fUnpackSnorm4x8");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fPackDouble2x32:
            if (backEnd->decomposeIntrinsic(EDiPackDouble2x32)) {
                UnsupportedFunctionality("decomposition of gla_fPackDouble2x32");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fUnpackDouble2x32:
            if (backEnd->decomposeIntrinsic(EDiUnpackDouble2x32)) {
                UnsupportedFunctionality("decomposition of gla_fUnpackDouble2x32");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fLength:
            if (backEnd->decomposeIntrinsic(EDiLength)) {
                if (GetComponentCount(arg0) > 1) {
                    Function* dot = GetDotIntrinsic(module, argTypes);
                    newInst = builder.CreateCall2(dot, arg0, arg0);

                    Function* sqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fSqrt, makeArrayRef(instTypes, 2));
                    newInst = builder.CreateCall(sqrt, newInst);
                } else {
                    Function* abs = Intrinsic::getDeclaration(module, Intrinsic::gla_fAbs, makeArrayRef(instTypes, 2));
                    newInst = builder.CreateCall(abs, arg0);
                }

                // Make next iteration revisit this decomposition, in case dot is
                // decomposed.
                instI = inst;
                ++instI;
            }
            break;
        case Intrinsic::gla_fDistance:
            if (backEnd->decomposeIntrinsic(EDiDistance)) {
                newInst = builder.CreateFSub(arg0, arg1);
                llvm::Type* types[] = { GetBasicType(newInst), newInst->getType() };
                Function* length = Intrinsic::getDeclaration(module, Intrinsic::gla_fLength, types);
                newInst = builder.CreateCall(length, newInst);

                // Make next iteration revisit this decomposition, in case length is
                // decomposed.
                instI = inst;
                ++instI;
            }
            break;
        case Intrinsic::gla_fDot2:
            if (backEnd->decomposeIntrinsic(EDiDot)) {
                newInst = builder.CreateFMul(arg0, arg1);
                llvm::Value* element0 = builder.CreateExtractElement(newInst, MakeUnsignedConstant(module->getContext(), 0));
                llvm::Value* element1 = builder.CreateExtractElement(newInst, MakeUnsignedConstant(module->getContext(), 1));
                newInst = builder.CreateFAdd(element0, element1);
            }
            break;
        case Intrinsic::gla_fDot3:
            if (backEnd->decomposeIntrinsic(EDiDot)) {
                newInst = builder.CreateFMul(arg0, arg1);
                arg0 = newInst;
                llvm::Value* element0 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0));
                llvm::Value* element1 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 1));
                newInst = builder.CreateFAdd(element0, element1);
                llvm::Value* element = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 2));
                newInst = builder.CreateFAdd(newInst, element);
            }
            break;
        case Intrinsic::gla_fDot4:
            if (backEnd->decomposeIntrinsic(EDiDot)) {
                newInst = builder.CreateFMul(arg0, arg1);
                arg0 = newInst;
                llvm::Value* element0 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0));
                llvm::Value* element1 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 1));
                newInst = builder.CreateFAdd(element0, element1);
                for (int el = 2; el < 4; ++el) {
                    llvm::Value* element = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), el));
                    newInst = builder.CreateFAdd(newInst, element);
                }
            }
            break;
        case Intrinsic::gla_fCross:
            if (backEnd->decomposeIntrinsic(EDiCross)) {
                // (a1, a2, a3) X (b1, b2, b3) -> (a2*b3 - a3*b2, a3*b1 - a1*b3, a1*b2 - a2*b1)

                llvm::Value* a1 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0));
                llvm::Value* a2 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 1));
                llvm::Value* a3 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 2));

                llvm::Value* b1 = builder.CreateExtractElement(arg1, MakeUnsignedConstant(module->getContext(), 0));
                llvm::Value* b2 = builder.CreateExtractElement(arg1, MakeUnsignedConstant(module->getContext(), 1));
                llvm::Value* b3 = builder.CreateExtractElement(arg1, MakeUnsignedConstant(module->getContext(), 2));

                llvm::Value* empty = llvm::UndefValue::get(arg0->getType());

                bool scalarized = false;

                if (scalarized) {
                    // do it all with scalars

                    // a2*b3 - a3*b2
                    llvm::Value* p1 = builder.CreateFMul(a2, b3);
                    llvm::Value* p2 = builder.CreateFMul(a3, b2);
                    llvm::Value* element = builder.CreateFSub(p1, p2);
                    newInst = builder.CreateInsertElement(empty, element, MakeUnsignedConstant(module->getContext(), 0));

                    // a3*b1 - a1*b3
                    p1 = builder.CreateFMul(a3, b1);
                    p2 = builder.CreateFMul(a1, b3);
                    element = builder.CreateFSub(p1, p2);
                    newInst = builder.CreateInsertElement(newInst, element, MakeUnsignedConstant(module->getContext(), 1));

                    // a1*b2 - a2*b1
                    p1 = builder.CreateFMul(a1, b2);
                    p2 = builder.CreateFMul(a2, b1);
                    element = builder.CreateFSub(p1, p2);
                    newInst = builder.CreateInsertElement(newInst, element, MakeUnsignedConstant(module->getContext(), 2));
                } else {
                    // do it all with vectors

                    // (a2, a3, a1)
                    llvm::Value* aPerm;
                    aPerm = builder.CreateInsertElement(empty, a2, MakeUnsignedConstant(module->getContext(), 0));
                    aPerm = builder.CreateInsertElement(aPerm, a3, MakeUnsignedConstant(module->getContext(), 1));
                    aPerm = builder.CreateInsertElement(aPerm, a1, MakeUnsignedConstant(module->getContext(), 2));

                    // (b3, b1, b2)
                    llvm::Value* bPerm;
                    bPerm = builder.CreateInsertElement(empty, b3, MakeUnsignedConstant(module->getContext(), 0));
                    bPerm = builder.CreateInsertElement(bPerm, b1, MakeUnsignedConstant(module->getContext(), 1));
                    bPerm = builder.CreateInsertElement(bPerm, b2, MakeUnsignedConstant(module->getContext(), 2));

                    // first term computation
                    llvm::Value* firstTerm = builder.CreateFMul(aPerm, bPerm);

                    // (a3, a1, a2)
                    aPerm = builder.CreateInsertElement(empty, a3, MakeUnsignedConstant(module->getContext(), 0));
                    aPerm = builder.CreateInsertElement(aPerm, a1, MakeUnsignedConstant(module->getContext(), 1));
                    aPerm = builder.CreateInsertElement(aPerm, a2, MakeUnsignedConstant(module->getContext(), 2));

                    // (b2, b3, b1)
                    bPerm = builder.CreateInsertElement(empty, b2, MakeUnsignedConstant(module->getContext(), 0));
                    bPerm = builder.CreateInsertElement(bPerm, b3, MakeUnsignedConstant(module->getContext(), 1));
                    bPerm = builder.CreateInsertElement(bPerm, b1, MakeUnsignedConstant(module->getContext(), 2));

                    // second term computation
                    newInst = builder.CreateFMul(aPerm, bPerm);

                    // Finish it off
                    newInst = builder.CreateFSub(firstTerm, newInst);
                }
            }
            break;
        case Intrinsic::gla_fNormalize:
            if (backEnd->decomposeIntrinsic(EDiNormalize)) {
                if (GetComponentCount(arg0) > 1) {
                    Function* dot = GetDotIntrinsic(module, argTypes);
                    newInst = builder.CreateCall2(dot, arg0, arg0);

                    llvm::Type* type[] = { newInst->getType(), newInst->getType() };
                    Function* inverseSqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fInverseSqrt, type);
                    newInst = builder.CreateCall(inverseSqrt, newInst);

                    // smear it
                    llvm::Value* smeared = llvm::UndefValue::get(arg0->getType());
                    for (int c = 0; c < GetComponentCount(arg0); ++c)
                        smeared = builder.CreateInsertElement(smeared, newInst, MakeIntConstant(module->getContext(), c));

                    newInst = builder.CreateFMul(arg0, smeared);
                } else {
                    newInst = MakeFloatConstant(module->getContext(), 1.0);
                }

                // Make next iteration revisit this decomposition, in case dot or inverse-sqrt
                // are decomposed.
                instI = inst;
                ++instI;
            }
            break;
        case Intrinsic::gla_fNormalize3D:
            if (backEnd->decomposeIntrinsic(EDiNormalize3D)) {

                // Note:  This does a 3D normalize on a vec3 or vec4.  The width of arg0 does
                // not determine that width of the dot-product input, the "3" in the "3D" does.

                llvm::Type* types[] = { GetBasicType(argTypes[0]), argTypes[0], argTypes[1] };
                Function* dot = Intrinsic::getDeclaration(module, Intrinsic::gla_fDot3, types);
                newInst = builder.CreateCall2(dot, arg0, arg0);

                llvm::Type* type[] = { newInst->getType(), newInst->getType() };
                Function* inverseSqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fInverseSqrt, type);
                newInst = builder.CreateCall(inverseSqrt, newInst);

                // smear it
                llvm::Value* smeared = llvm::UndefValue::get(arg0->getType());
                for (int c = 0; c < GetComponentCount(arg0); ++c)
                    smeared = builder.CreateInsertElement(smeared, newInst, MakeIntConstant(module->getContext(), c));

                // If we're 4-wide, copy over the original w component
                if (GetComponentCount(arg0) == 4)
                    smeared = builder.CreateInsertElement(smeared, arg0, MakeIntConstant(module->getContext(), 4));

                newInst = builder.CreateFMul(arg0, smeared);

                // Make next iteration revisit this decomposition, in case dot or inverse-sqrt
                // are decomposed.
                instI = inst;
                ++instI;
            }
            break;
        case Intrinsic::gla_fLit:
            if (backEnd->decomposeIntrinsic(EDiLit)) {
                UnsupportedFunctionality("decomposition of gla_fLit");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fFaceForward:
            if (backEnd->decomposeIntrinsic(EDiFaceForward)) {
                //
                // faceForward(N, I, Nref) is defined to be N if dot(Nref, I) < 0, otherwise return –N.
                //
                UnsupportedFunctionality("decomposition of gla_fFaceForward");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fReflect:
            if (backEnd->decomposeIntrinsic(EDiReflect))
            {
                //
                // reflect(I, N) is defined to be I – 2 * dot(N, I) * N,
                // where N may be assumed to be normalized.
                //
                // Note if the number of components is 1, then N == 1 and
                // this turns into I - 2*I, or -I.
                //
                if (GetComponentCount(arg0) > 1) {
                    Function* dot = GetDotIntrinsic(module, argTypes);
                    newInst = builder.CreateCall2(dot, arg0, arg1);
                    newInst = MultiplyByConstant(builder, newInst, 2.0);

                    // smear this back up to a vector again
                    llvm::Value* smeared = llvm::UndefValue::get(arg0->getType());
                    for (int c = 0; c < GetComponentCount(arg0); ++c)
                        smeared = builder.CreateInsertElement(smeared, newInst, MakeIntConstant(module->getContext(), c));

                    newInst = builder.CreateFMul(smeared, arg1);
                    newInst = builder.CreateFSub(arg0, newInst);
                } else {
                    newInst = builder.CreateFNeg(arg0);
                }

                // Make next iteration revisit this decomposition, in case dot
                // is decomposed
                instI = inst;
                ++instI;
            }
            break;
        case Intrinsic::gla_fRefract:
            if (backEnd->decomposeIntrinsic(EDiRefract)) {
                UnsupportedFunctionality("decomposition of gla_fRefract");
                //changed = true;
            }
            break;
        case Intrinsic::gla_fFilterWidth:
            if (backEnd->decomposeIntrinsic(EDiFilterWidth)) {
                // filterWidth = abs(dFdx(p)) + abs(dFdy(p))
                Function* dFdx = Intrinsic::getDeclaration(module, Intrinsic::gla_fDFdx, makeArrayRef(argTypes, 2));
                Function* dFdy = Intrinsic::getDeclaration(module, Intrinsic::gla_fDFdy, makeArrayRef(argTypes, 2));
                Function*  abs = Intrinsic::getDeclaration(module, Intrinsic::gla_fAbs,  makeArrayRef(instTypes, 2));
                llvm::Value* dx = builder.CreateCall(dFdx, arg0);
                llvm::Value* dy = builder.CreateCall(dFdy, arg0);
                dx = builder.CreateCall(abs, dx);
                dy = builder.CreateCall(abs, dy);
                newInst = builder.CreateFAdd(dx, dy);
            }
            break;
        case Intrinsic::gla_fFixedTransform:
            if (backEnd->decomposeIntrinsic(EDiFixedTransform)) {
                UnsupportedFunctionality("decomposition of gla_fFixedTransform");
                //changed = true;
            }
            break;

        case Intrinsic::gla_any:
            if (backEnd->decomposeIntrinsic(EDiAny)) {
                if (GetComponentCount(arg0) == 1)
                    UnsupportedFunctionality("any() on a scalar");

                newInst = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0));
                for (int c = 1; c < GetComponentCount(arg0); ++c) {
                    llvm::Value* comp = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), c));
                    newInst = builder.CreateOr(newInst, comp);
                }
            }
            break;
        case Intrinsic::gla_all:
            if (backEnd->decomposeIntrinsic(EDiAll)) {
                if (GetComponentCount(arg0) == 1)
                    UnsupportedFunctionality("all() on a scalar");

                newInst = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0));
                for (int c = 1; c < GetComponentCount(arg0); ++c) {
                    llvm::Value* comp = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), c));
                    newInst = builder.CreateAnd(newInst, comp);
                }
            }
            break;
        case Intrinsic::gla_not:
            if (backEnd->decomposeIntrinsic(EDiNot)) {
                if (GetComponentCount(arg0) == 1)
                    UnsupportedFunctionality("not() on a scalar");

                newInst = builder.CreateNot(arg0);
            }
            break;
        case Intrinsic::gla_fTextureSample:
        case Intrinsic::gla_fTextureSampleLodRefZ:
        case Intrinsic::gla_fTextureSampleLodRefZOffset:
        case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad:
            if (backEnd->decomposeIntrinsic(EDiTextureProjection)) {

                // if projection flag is set, divide all coordinates (and refZ) by projection
                int texFlags = GetConstantInt(intrinsic->getArgOperand(GetTextureOpIndex(ETOFlag)));
                if (texFlags & ETFProjected) {

                    // insert before intrinsic since we are not replacing it
                    builder.SetInsertPoint(inst);

                    // turn off projected flag to reflect decomposition
                    texFlags &= ~ETFProjected;

                    llvm::Value* coords = intrinsic->getArgOperand(GetTextureOpIndex(ETOCoord));

                    // determine how many channels are live after decomposition
                    int newCoordWidth = 0;
                    switch (GetConstantInt(intrinsic->getArgOperand(gla::ETOSamplerType))) {
                    case gla::ESamplerBuffer:
                    case gla::ESampler1D:      newCoordWidth = 1;  break;
                    case gla::ESampler2D:
                    case gla::ESampler2DRect:
                    case gla::ESampler2DMS:    newCoordWidth = 2;  break;
                    case gla::ESampler3D:      newCoordWidth = 3;  break;
                    case gla::ESamplerCube:
                        gla::UnsupportedFunctionality("projection with cube sampler");
                        break;
                    default:
                        assert(0 && "Unknown sampler type");
                        break;
                    }

                    if (texFlags & gla::ETFArrayed)
                        gla::UnsupportedFunctionality("projection with arrayed sampler");

                    // projection resides in last component
                    llvm::Value* projIdx = MakeUnsignedConstant(module->getContext(), GetComponentCount(coords) - 1);
                    llvm::Value* divisor = builder.CreateExtractElement(coords, projIdx);

                    llvm::Type* newCoordType;
                    if (newCoordWidth > 1)
                        newCoordType = llvm::VectorType::get(GetBasicType(coords), newCoordWidth);
                    else
                        newCoordType = GetBasicType(coords);

                    // create space to hold results
                    llvm::Value* newCoords   = llvm::UndefValue::get(newCoordType);
                    llvm::Value* smearedProj = llvm::UndefValue::get(newCoordType);

                    if (newCoordWidth > 1) {
                        for (int i = 0; i < newCoordWidth; ++i) {
                            llvm::Value* idx = MakeUnsignedConstant(module->getContext(), i);

                            // smear projection
                            smearedProj = builder.CreateInsertElement(smearedProj, divisor, idx);

                            // shrink coordinates to remove projection component
                            llvm::Value* oldCoord = builder.CreateExtractElement(coords, idx);
                            newCoords = builder.CreateInsertElement(newCoords, oldCoord, idx);
                        }
                    } else {
                        smearedProj = divisor;
                        newCoords = builder.CreateExtractElement(coords, MakeUnsignedConstant(module->getContext(), 0));
                    }

                    // divide coordinates
                    newCoords = builder.CreateFDiv(newCoords, smearedProj);

                    //
                    // Remaining code declares new intrinsic and modifies call arguments
                    //

                    // build up argTypes for flexible parameters, including result
                    llvm::SmallVector<llvm::Type*, 5> types;

                    // result type
                    types.push_back(intrinsic->getType());

                    // use new coords to reflect shrink
                    types.push_back(newCoords->getType());

                    // add offset
                    switch (intrinsic->getIntrinsicID()) {
                    case Intrinsic::gla_fTextureSampleLodRefZOffset:
                    case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad:
                        types.push_back(intrinsic->getArgOperand(ETOOffset)->getType());
                    default:
                        break;
                    }

                    // add gradients
                    switch (intrinsic->getIntrinsicID()) {
                    case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad:
                        types.push_back(intrinsic->getArgOperand(ETODPdx)->getType());
                        types.push_back(intrinsic->getArgOperand(ETODPdy)->getType());
                    default:
                        break;
                    }

                    // declare the new intrinsic
                    // TODO: functionality: texturing correctness: is this getting the correct non-projective form?
                    Function* texture = Intrinsic::getDeclaration(module, intrinsic->getIntrinsicID(), types);

                    // modify arguments to match new intrinsic
                    intrinsic->setCalledFunction(texture);
                    intrinsic->setArgOperand(ETOFlag, MakeUnsignedConstant(module->getContext(), texFlags));
                    intrinsic->setArgOperand(ETOCoord, newCoords);

                    switch (intrinsic->getIntrinsicID()) {
                    case Intrinsic::gla_fTextureSampleLodRefZ:
                    case Intrinsic::gla_fTextureSampleLodRefZOffset:
                    case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad:
                        intrinsic->setArgOperand(ETORefZ, builder.CreateFDiv(intrinsic->getArgOperand(ETORefZ), divisor));                        
                    default:
                        break;
                    }

                    // mark our change, but don't replace the intrinsic
                    changed = true;
                }
            }
            break;

        default:
            // The cases above needs to be comprehensive in terms of checking
            // for what intrinsics to decompose.  If not there the assumption is
            // it never needs to be decomposed.
            break;
        }

        if (newInst) {
            inst->replaceAllUsesWith(newInst);
            inst->dropAllReferences();
            inst->eraseFromParent();
            changed = true;
        }
    }
}
示例#28
0
文件: DefUse.cpp 项目: tomsik68/dg
void LLVMDefUseAnalysis::handleIntrinsicCall(LLVMNode *callNode,
                                             CallInst *CI)
{
    static std::set<Instruction *> warnings;
    IntrinsicInst *I = cast<IntrinsicInst>(CI);
    Value *dest, *src = nullptr;

    switch (I->getIntrinsicID())
    {
        case Intrinsic::memmove:
        case Intrinsic::memcpy:
            src = I->getOperand(1);
            // fall-through
        case Intrinsic::memset:
        case Intrinsic::vastart:
            dest = I->getOperand(0);
            break;
        case Intrinsic::vaend:
        case Intrinsic::lifetime_start:
        case Intrinsic::lifetime_end:
        case Intrinsic::trap:
            // nothing to be done here
            return;
        case Intrinsic::bswap:
        case Intrinsic::prefetch:
        case Intrinsic::objectsize:
        case Intrinsic::sadd_with_overflow:
        case Intrinsic::uadd_with_overflow:
        case Intrinsic::ssub_with_overflow:
        case Intrinsic::usub_with_overflow:
        case Intrinsic::smul_with_overflow:
        case Intrinsic::umul_with_overflow:
            // nothing to be done, direct def-use edges
            // will be added later
            assert(I->getCalledFunction()->doesNotAccessMemory());
            return;
        case Intrinsic::stacksave:
        case Intrinsic::stackrestore:
            if (warnings.insert(CI).second)
                llvmutils::printerr("WARN: stack save/restore not implemented", CI);
            return;
        default:
            llvmutils::printerr("WARNING: unhandled intrinsic call", I);
            // if it does not access memory, we can just add
            // direct def-use edges
            if (I->getCalledFunction()->doesNotAccessMemory())
                return;

            assert (0 && "Unhandled intrinsic that accesses memory");
            // for release builds, do the best we can here
            handleUndefinedCall(callNode, CI);
            return;
    }

    // we must have dest set
    assert(dest);

    // these functions touch the memory of the pointers
    addDataDependence(callNode, CI, dest, Offset::UNKNOWN /* FIXME */);

    if (src)
        addDataDependence(callNode, CI, src, Offset::UNKNOWN /* FIXME */);
}
示例#29
0
/// Returns true if the beginning of this instruction can be safely shortened
/// in length.
static bool isShortenableAtTheBeginning(Instruction *I) {
  // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
  // easily done by offsetting the source address.
  IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
  return II && II->getIntrinsicID() == Intrinsic::memset;
}