static bool allPredCameFromBeginCatch( BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin, IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) { VisitedBlocks.insert(BB); // Look for a begincatch in this block. for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE; ++RI) { IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI); if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) return true; // If we find another end catch before we find a begin catch, that's // an error. if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) { *SecondEndCatch = IC; return false; } // If we encounter a landingpad instruction, the search failed. if (isa<LandingPadInst>(*RI)) return false; } // If while searching we find a block with no predeccesors, // the search failed. if (pred_empty(BB)) return false; // Search any predecessors we haven't seen before. for (BasicBlock *Pred : predecessors(BB)) { if (VisitedBlocks.count(Pred)) continue; if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch, VisitedBlocks)) return false; } return true; }
static bool allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin, IntrinsicInst **SecondBeginCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) { VisitedBlocks.insert(BB); for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) { IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I); if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) return true; // If we find another begincatch while looking for an endcatch, // that's also an error. if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) { *SecondBeginCatch = IC; return false; } } // If we reach a block with no successors while searching, the // search has failed. if (succ_empty(BB)) return false; // Otherwise, search all of the successors. for (BasicBlock *Succ : successors(BB)) { if (VisitedBlocks.count(Succ)) continue; if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch, VisitedBlocks)) return false; } return true; }
bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); const Type *ReturnTy = CI->getType(); Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); CI->replaceAllUsesWith(RetVal); CI->eraseFromParent(); return true; } // From here on out we're working with named functions. if (CI->getCalledFunction() == 0) return false; // We'll need TargetData from here on out. const TargetData *TD = TLI ? TLI->getTargetData() : 0; if (!TD) return false; // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls // that have the default "don't know" as the objectsize. Anything else // should be left alone. CodeGenPrepareFortifiedLibCalls Simplifier; return Simplifier.fold(CI, TD); }
bool LLVMReachingDefsAnalysis::handleIntrinsicCall(LLVMNode *callNode, CallInst *CI, DefMap *df) { bool changed = false; IntrinsicInst *I = cast<IntrinsicInst>(CI); Value *dest; switch (I->getIntrinsicID()) { case Intrinsic::memmove: case Intrinsic::memcpy: case Intrinsic::memset: dest = I->getOperand(0); break; default: return handleUndefinedCall(callNode, CI, df); } LLVMNode *destNode = getOperand(callNode, dest, 1); assert(destNode && "No operand for intrinsic call"); for (const Pointer& ptr : destNode->getPointsTo()) { // we could compute all the concrete offsets, but // these functions usually set the whole memory, // so if we use UNKNOWN_OFFSET, the effect is the same changed |= df->add(Pointer(ptr.obj, UNKNOWN_OFFSET), callNode); } return changed; }
/// getLocForWrite - Return a Location stored to by the specified instruction. /// If isRemovable returns true, this function and getLocForRead completely /// describe the memory operations for this instruction. static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) return MemoryLocation::get(SI); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) { // memcpy/memmove/memset. MemoryLocation Loc = MemoryLocation::getForDest(MI); return Loc; } IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); if (!II) return MemoryLocation(); switch (II->getIntrinsicID()) { default: return MemoryLocation(); // Unhandled intrinsic. case Intrinsic::init_trampoline: // FIXME: We don't know the size of the trampoline, so we can't really // handle it here. return MemoryLocation(II->getArgOperand(0)); case Intrinsic::lifetime_end: { uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); return MemoryLocation(II->getArgOperand(1), Len); } } }
bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32( IntrinsicInst &I) const { assert(I.getIntrinsicID() == Intrinsic::bitreverse && "I must be bitreverse intrinsic"); assert(needsPromotionToI32(I.getType()) && "I does not need promotion to i32"); IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); Type *I32Ty = getI32Ty(Builder, I.getType()); Function *I32 = Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty }); Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty); Value *ExtRes = Builder.CreateCall(I32, { ExtOp }); Value *LShrOp = Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType())); Value *TruncRes = Builder.CreateTrunc(LShrOp, I.getType()); I.replaceAllUsesWith(TruncRes); I.eraseFromParent(); return true; }
/// Escape RegNode so that we can access it from child handlers. Find the call /// to frameescape, if any, in the entry block and append RegNode to the list /// of arguments. int WinEHStatePass::escapeRegNode(Function &F) { // Find the call to frameescape and extract its arguments. IntrinsicInst *EscapeCall = nullptr; for (Instruction &I : F.getEntryBlock()) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I); if (II && II->getIntrinsicID() == Intrinsic::frameescape) { EscapeCall = II; break; } } SmallVector<Value *, 8> Args; if (EscapeCall) { auto Ops = EscapeCall->arg_operands(); Args.append(Ops.begin(), Ops.end()); } Args.push_back(RegNode); // Replace the call (if it exists) with new one. Otherwise, insert at the end // of the entry block. IRBuilder<> Builder(&F.getEntryBlock(), EscapeCall ? EscapeCall : F.getEntryBlock().end()); Builder.CreateCall(FrameEscape, Args); if (EscapeCall) EscapeCall->eraseFromParent(); return Args.size() - 1; }
static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) { if (!Op->hasOneUse()) return; IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op); if (!II) return; if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra()) return; Log2 = II; Value *OpLog2Of = II->getArgOperand(0); if (!OpLog2Of->hasOneUse()) return; Instruction *I = dyn_cast<Instruction>(OpLog2Of); if (!I) return; if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra()) return; if (match(I->getOperand(0), m_SpecificFP(0.5))) Y = I->getOperand(1); else if (match(I->getOperand(1), m_SpecificFP(0.5))) Y = I->getOperand(0); }
static bool isCallPromotable(CallInst *CI) { // TODO: We might be able to handle some cases where the callee is a // constantexpr bitcast of a function. if (!CI->getCalledFunction()) return false; IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (!II) return false; switch (II->getIntrinsicID()) { case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: case Intrinsic::objectsize: return true; default: return false; } }
/// \brief Split sadd.with.overflow into add + sadd.with.overflow to allow /// analysis and optimization. /// /// \return A new value representing the non-overflowing add if possible, /// otherwise return the original value. Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser, const DominatorTree *DT) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(IVUser); if (!II || II->getIntrinsicID() != Intrinsic::sadd_with_overflow) return IVUser; // Find a branch guarded by the overflow check. BranchInst *Branch = 0; Instruction *AddVal = 0; for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); UI != E; ++UI) { if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(*UI)) { if (ExtractInst->getNumIndices() != 1) continue; if (ExtractInst->getIndices()[0] == 0) AddVal = ExtractInst; else if (ExtractInst->getIndices()[0] == 1 && ExtractInst->hasOneUse()) Branch = dyn_cast<BranchInst>(ExtractInst->use_back()); } } if (!AddVal || !Branch) return IVUser; BasicBlock *ContinueBB = Branch->getSuccessor(1); if (llvm::next(pred_begin(ContinueBB)) != pred_end(ContinueBB)) return IVUser; // Check if all users of the add are provably NSW. bool AllNSW = true; for (Value::use_iterator UI = AddVal->use_begin(), E = AddVal->use_end(); UI != E; ++UI) { if (Instruction *UseInst = dyn_cast<Instruction>(*UI)) { BasicBlock *UseBB = UseInst->getParent(); if (PHINode *PHI = dyn_cast<PHINode>(UseInst)) UseBB = PHI->getIncomingBlock(UI); if (!DT->dominates(ContinueBB, UseBB)) { AllNSW = false; break; } } } if (!AllNSW) return IVUser; // Go for it... IRBuilder<> Builder(IVUser); Instruction *AddInst = dyn_cast<Instruction>( Builder.CreateNSWAdd(II->getOperand(0), II->getOperand(1))); // The caller expects the new add to have the same form as the intrinsic. The // IV operand position must be the same. assert((AddInst->getOpcode() == Instruction::Add && AddInst->getOperand(0) == II->getOperand(0)) && "Bad add instruction created from overflow intrinsic."); AddVal->replaceAllUsesWith(AddInst); DeadInsts.push_back(AddVal); return AddInst; }
bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { case Intrinsic::bitreverse: return visitBitreverseIntrinsicInst(I); default: return false; } }
bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { BasicBlock *BB = CI->getParent(); // Lower inline assembly if we can. // If we found an inline asm expession, and if the target knows how to // lower it to normal LLVM code, do so now. if (TLI && isa<InlineAsm>(CI->getCalledValue())) { if (TLI->ExpandInlineAsm(CI)) { // Avoid invalidating the iterator. CurInstIterator = BB->begin(); // Avoid processing instructions out of order, which could cause // reuse before a value is defined. SunkAddrs.clear(); return true; } // Sink address computing for memory operands into the block. if (OptimizeInlineAsmInst(CI)) return true; } // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); Type *ReturnTy = CI->getType(); Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); // Substituting this can cause recursive simplifications, which can // invalidate our iterator. Use a WeakVH to hold onto it in case this // happens. WeakVH IterHandle(CurInstIterator); ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, TLInfo, ModifiedDT ? 0 : DT); // If the iterator instruction was recursively deleted, start over at the // start of the block. if (IterHandle != CurInstIterator) { CurInstIterator = BB->begin(); SunkAddrs.clear(); } return true; } // From here on out we're working with named functions. if (CI->getCalledFunction() == 0) return false; // We'll need TargetData from here on out. const TargetData *TD = TLI ? TLI->getTargetData() : 0; if (!TD) return false; // Lower all default uses of _chk calls. This is very similar // to what InstCombineCalls does, but here we are only lowering calls // that have the default "don't know" as the objectsize. Anything else // should be left alone. CodeGenPrepareFortifiedLibCalls Simplifier; return Simplifier.fold(CI, TD); }
void AAAnalyzer::handle_instrinsic(Instruction *inst) { IntrinsicInst * call = (IntrinsicInst*) inst; switch (call->getIntrinsicID()) { // Variable Argument Handling Intrinsics case Intrinsic::vastart: { Value * va_list_ptr = call->getArgOperand(0); wrapValue(va_list_ptr); } break; case Intrinsic::vaend: { } break; case Intrinsic::vacopy: // the same with memmove/memcpy //Standard C Library Intrinsics case Intrinsic::memmove: case Intrinsic::memcpy: { Value * src_ptr = call->getArgOperand(0); Value * dst_ptr = call->getArgOperand(1); DyckVertex* src_ptr_ver = wrapValue(src_ptr); DyckVertex* dst_ptr_ver = wrapValue(dst_ptr); DyckVertex* src_ver = addPtrTo(src_ptr_ver, NULL); DyckVertex* dst_ver = addPtrTo(dst_ptr_ver, NULL); makeAlias(src_ver, dst_ver); } break; case Intrinsic::memset: { Value * ptr = call->getArgOperand(0); Value * val = call->getArgOperand(1); addPtrTo(wrapValue(ptr), wrapValue(val)); } break; /// @todo other C lib intrinsics //Accurate Garbage Collection Intrinsics //Code Generator Intrinsics //Bit Manipulation Intrinsics //Exception Handling Intrinsics //Trampoline Intrinsics //Memory Use Markers //General Intrinsics //Arithmetic with Overflow Intrinsics //Specialised Arithmetic Intrinsics //Half Precision Floating Point Intrinsics //Debugger Intrinsics default:break; } }
/// getStoredPointerOperand - Return the pointer that is being written to. static Value *getStoredPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) return MI->getDest(); IntrinsicInst *II = cast<IntrinsicInst>(I); switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic!"); case Intrinsic::init_trampoline: return II->getArgOperand(0); } }
/// isShortenable - Returns true if this instruction can be safely shortened in /// length. static bool isShortenable(Instruction *I) { // Don't shorten stores for now if (isa<StoreInst>(I)) return false; IntrinsicInst *II = cast<IntrinsicInst>(I); switch (II->getIntrinsicID()) { default: return false; case Intrinsic::memset: case Intrinsic::memcpy: // Do shorten memory intrinsics. return true; } }
bool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( Instruction *Addr) const { AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets()); Function *Func = (*Blocks.begin())->getParent(); for (BasicBlock &BB : *Func) { if (Blocks.count(&BB)) continue; for (Instruction &II : BB) { if (isa<DbgInfoIntrinsic>(II)) continue; unsigned Opcode = II.getOpcode(); Value *MemAddr = nullptr; switch (Opcode) { case Instruction::Store: case Instruction::Load: { if (Opcode == Instruction::Store) { StoreInst *SI = cast<StoreInst>(&II); MemAddr = SI->getPointerOperand(); } else { LoadInst *LI = cast<LoadInst>(&II); MemAddr = LI->getPointerOperand(); } // Global variable can not be aliased with locals. if (dyn_cast<Constant>(MemAddr)) break; Value *Base = MemAddr->stripInBoundsConstantOffsets(); if (!dyn_cast<AllocaInst>(Base) || Base == AI) return false; break; } default: { IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II); if (IntrInst) { if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) break; return false; } // Treat all the other cases conservatively if it has side effects. if (II.mayHaveSideEffects()) return false; } } } } return true; }
// TODO: Ideally we should share Inliner's InlineCost Analysis code. // For now use a simplified version. The returned 'InlineCost' will be used // to esimate the size cost as well as runtime cost of the BB. int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) { int InlineCost = 0; const DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (isa<DbgInfoIntrinsic>(I)) continue; switch (I->getOpcode()) { case Instruction::BitCast: case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::Alloca: continue; case Instruction::GetElementPtr: if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) continue; default: break; } IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I); if (IntrInst) { if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) continue; } if (CallInst *CI = dyn_cast<CallInst>(I)) { InlineCost += getCallsiteCost(CallSite(CI), DL); continue; } if (InvokeInst *II = dyn_cast<InvokeInst>(I)) { InlineCost += getCallsiteCost(CallSite(II), DL); continue; } if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) { InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost; continue; } InlineCost += InlineConstants::InstrCost; } return InlineCost; }
static bool isCallPromotable(CallInst *CI) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (!II) return false; switch (II->getIntrinsicID()) { case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: case Intrinsic::objectsize: return true; default: return false; } }
bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); if (II) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::masked_load: // Scalarize unsupported vector masked load if (!TTI->isLegalMaskedLoad(CI->getType())) { scalarizeMaskedLoad(CI); ModifiedDT = true; return true; } return false; case Intrinsic::masked_store: if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { scalarizeMaskedStore(CI); ModifiedDT = true; return true; } return false; case Intrinsic::masked_gather: if (!TTI->isLegalMaskedGather(CI->getType())) { scalarizeMaskedGather(CI); ModifiedDT = true; return true; } return false; case Intrinsic::masked_scatter: if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { scalarizeMaskedScatter(CI); ModifiedDT = true; return true; } return false; } } return false; }
/// isRemovable - If the value of this instruction and the memory it writes to /// is unused, may we delete this instruction? static bool isRemovable(Instruction *I) { // Don't remove volatile/atomic stores. if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->isUnordered(); IntrinsicInst *II = cast<IntrinsicInst>(I); switch (II->getIntrinsicID()) { default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate"); case Intrinsic::lifetime_end: // Never remove dead lifetime_end's, e.g. because it is followed by a // free. return false; case Intrinsic::init_trampoline: // Always safe to remove init_trampoline. return true; case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy: // Don't remove volatile memory intrinsics. return !cast<MemIntrinsic>(II)->isVolatile(); } }
/// getLocForWrite - Return a Location stored to by the specified instruction. /// If isRemovable returns true, this function and getLocForRead completely /// describe the memory operations for this instruction. static AliasAnalysis::Location getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { const DataLayout *DL = AA.getDataLayout(); if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) return AA.getLocation(SI); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) { // memcpy/memmove/memset. AliasAnalysis::Location Loc = AA.getLocationForDest(MI); // If we don't have target data around, an unknown size in Location means // that we should use the size of the pointee type. This isn't valid for // memset/memcpy, which writes more than an i8. if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr) return AliasAnalysis::Location(); return Loc; } IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst); if (!II) return AliasAnalysis::Location(); switch (II->getIntrinsicID()) { default: return AliasAnalysis::Location(); // Unhandled intrinsic. case Intrinsic::init_trampoline: // If we don't have target data around, an unknown size in Location means // that we should use the size of the pointee type. This isn't valid for // init.trampoline, which writes more than an i8. if (!DL) return AliasAnalysis::Location(); // FIXME: We don't know the size of the trampoline, so we can't really // handle it here. return AliasAnalysis::Location(II->getArgOperand(0)); case Intrinsic::lifetime_end: { uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); return AliasAnalysis::Location(II->getArgOperand(1), Len); } } }
void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) return; IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I)) return; DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); const Function &ContainingFunction = *I.getParent()->getParent(); // FIXME: We should also try to get this value from the reqd_work_group_size // function attribute if it is available. unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction); int AllocaSize = WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy); if (AllocaSize > LocalMemAvailable) { DEBUG(dbgs() << " Not enough local memory to promote alloca.\n"); return; } std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); LocalMemAvailable -= AllocaSize; Function *F = I.getParent()->getParent(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); GV->setUnnamedAddr(true); GV->setAlignment(I.getAlignment()); Value *TCntY, *TCntZ; std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder); Value *TIdX = getWorkitemID(Builder, 0); Value *TIdY = getWorkitemID(Builder, 1); Value *TIdZ = getWorkitemID(Builder, 2); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); Value *Indices[] = { Constant::getNullValue(Type::getInt32Ty(Mod->getContext())), TID }; Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (Value *V : WorkList) { CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); // The operand's value should be corrected on its own. if (isa<AddrSpaceCastInst>(V)) continue; // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); continue; } IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call); if (!Intr) { // FIXME: What is this for? It doesn't make sense to promote arbitrary // function calls. If the call is to a defined function that can also be // promoted, we should be able to do this once that function is also // rewritten. std::vector<Type*> ArgTypes; for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands(); ArgIdx != ArgEnd; ++ArgIdx) { ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType()); } Function *F = Call->getCalledFunction(); FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes, F->isVarArg()); Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(), NewType, F->getAttributes()); Function *NewF = cast<Function>(C); Call->setCalledFunction(NewF); continue; } Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), MemCpy->getLength(), MemCpy->getAlignment(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast<MemMoveInst>(Intr); Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(), MemMove->getLength(), MemMove->getAlignment(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any // address space. continue; case Intrinsic::objectsize: { Value *Src = Intr->getOperand(0); Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) }); Intr->replaceAllUsesWith(NewCall); Intr->eraseFromParent(); continue; } default: Intr->dump(); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } }
// FIXME: Should try to pick the most likely to be profitable allocas first. bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) return false; IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I, AS)) return true; // Promoted to vector. const Function &ContainingFunction = *I.getParent()->getParent(); CallingConv::ID CC = ContainingFunction.getCallingConv(); // Don't promote the alloca to LDS for shader calling conventions as the work // item ID intrinsics are not supported for these calling conventions. // Furthermore not all LDS is available for some of the stages. switch (CC) { case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: break; default: DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n"); return false; } // Not likely to have sufficient local memory for promotion. if (!SufficientLDS) return false; const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction); unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second; const DataLayout &DL = Mod->getDataLayout(); unsigned Align = I.getAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(I.getAllocatedType()); // FIXME: This computed padding is likely wrong since it depends on inverse // usage order. // // FIXME: It is also possible that if we're allowed to use all of the memory // could could end up using more than the maximum due to alignment padding. uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align); uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy); NewSize += AllocSize; if (NewSize > LocalMemLimit) { DEBUG(dbgs() << " " << AllocSize << " bytes of local memory not available to promote\n"); return false; } CurrentLocalMemUsage = NewSize; std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, &I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return false; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); Function *F = I.getParent()->getParent(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, AS.LOCAL_ADDRESS); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); GV->setAlignment(I.getAlignment()); Value *TCntY, *TCntZ; std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder); Value *TIdX = getWorkitemID(Builder, 0); Value *TIdY = getWorkitemID(Builder, 1); Value *TIdZ = getWorkitemID(Builder, 2); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); Value *Indices[] = { Constant::getNullValue(Type::getInt32Ty(Mod->getContext())), TID }; Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (Value *V : WorkList) { CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) { Value *Src0 = CI->getOperand(0); Type *EltTy = Src0->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); if (isa<ConstantPointerNull>(CI->getOperand(0))) CI->setOperand(0, ConstantPointerNull::get(NewTy)); if (isa<ConstantPointerNull>(CI->getOperand(1))) CI->setOperand(1, ConstantPointerNull::get(NewTy)); continue; } // The operand's value should be corrected on its own and we don't want to // touch the users. if (isa<AddrSpaceCastInst>(V)) continue; Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); // Adjust the types of any constant operands. if (SelectInst *SI = dyn_cast<SelectInst>(V)) { if (isa<ConstantPointerNull>(SI->getOperand(1))) SI->setOperand(1, ConstantPointerNull::get(NewTy)); if (isa<ConstantPointerNull>(SI->getOperand(2))) SI->setOperand(2, ConstantPointerNull::get(NewTy)); } else if (PHINode *Phi = dyn_cast<PHINode>(V)) { for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { if (isa<ConstantPointerNull>(Phi->getIncomingValue(I))) Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy)); } } continue; } IntrinsicInst *Intr = cast<IntrinsicInst>(Call); Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(), MemCpy->getRawSource(), MemCpy->getSourceAlignment(), MemCpy->getLength(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast<MemMoveInst>(Intr); Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(), MemMove->getRawSource(), MemMove->getSourceAlignment(), MemMove->getLength(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getDestAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any // address space. continue; case Intrinsic::objectsize: { Value *Src = Intr->getOperand(0); Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall( ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)}); Intr->replaceAllUsesWith(NewCall); Intr->eraseFromParent(); continue; } default: Intr->print(errs()); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } return true; }
void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) { IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I)) return; DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); // FIXME: This is the maximum work group size. We should try to get // value from the reqd_work_group_size function attribute if it is // available. unsigned WorkGroupSize = 256; int AllocaSize = WorkGroupSize * Mod->getDataLayout()->getTypeAllocSize(AllocaTy); if (AllocaSize > LocalMemAvailable) { DEBUG(dbgs() << " Not enough local memory to promote alloca.\n"); return; } std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); LocalMemAvailable -= AllocaSize; GlobalVariable *GV = new GlobalVariable( *Mod, ArrayType::get(I.getAllocatedType(), 256), false, GlobalValue::ExternalLinkage, 0, I.getName(), 0, GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); FunctionType *FTy = FunctionType::get( Type::getInt32Ty(Mod->getContext()), false); AttributeSet AttrSet; AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone); Value *ReadLocalSizeY = Mod->getOrInsertFunction( "llvm.r600.read.local.size.y", FTy, AttrSet); Value *ReadLocalSizeZ = Mod->getOrInsertFunction( "llvm.r600.read.local.size.z", FTy, AttrSet); Value *ReadTIDIGX = Mod->getOrInsertFunction( "llvm.r600.read.tidig.x", FTy, AttrSet); Value *ReadTIDIGY = Mod->getOrInsertFunction( "llvm.r600.read.tidig.y", FTy, AttrSet); Value *ReadTIDIGZ = Mod->getOrInsertFunction( "llvm.r600.read.tidig.z", FTy, AttrSet); Value *TCntY = Builder.CreateCall(ReadLocalSizeY); Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ); Value *TIdX = Builder.CreateCall(ReadTIDIGX); Value *TIdY = Builder.CreateCall(ReadTIDIGY); Value *TIdZ = Builder.CreateCall(ReadTIDIGZ); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); std::vector<Value*> Indices; Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext()))); Indices.push_back(TID); Value *Offset = Builder.CreateGEP(GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (std::vector<Value*>::iterator i = WorkList.begin(), e = WorkList.end(); i != e; ++i) { Value *V = *i; CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); // The operand's value should be corrected on its own. if (isa<AddrSpaceCastInst>(V)) continue; // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); continue; } IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call); if (!Intr) { std::vector<Type*> ArgTypes; for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands(); ArgIdx != ArgEnd; ++ArgIdx) { ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType()); } Function *F = Call->getCalledFunction(); FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes, F->isVarArg()); Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType, F->getAttributes()); Function *NewF = cast<Function>(C); Call->setCalledFunction(NewF); continue; } Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), MemCpy->getLength(), MemCpy->getAlignment(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } default: Intr->dump(); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } }
void LLVMDefUseAnalysis::handleIntrinsicCall(LLVMNode *callNode, CallInst *CI) { static std::set<Instruction *> warnings; IntrinsicInst *I = cast<IntrinsicInst>(CI); Value *dest, *src = nullptr; switch (I->getIntrinsicID()) { case Intrinsic::memmove: case Intrinsic::memcpy: src = I->getOperand(1); // fall-through case Intrinsic::memset: case Intrinsic::vastart: dest = I->getOperand(0); break; case Intrinsic::vaend: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::trap: // nothing to be done here return; case Intrinsic::bswap: case Intrinsic::prefetch: case Intrinsic::objectsize: case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: // nothing to be done, direct def-use edges // will be added later assert(I->getCalledFunction()->doesNotAccessMemory()); return; case Intrinsic::stacksave: case Intrinsic::stackrestore: if (warnings.insert(CI).second) llvmutils::printerr("WARN: stack save/restore not implemented", CI); return; default: llvmutils::printerr("WARNING: unhandled intrinsic call", I); // if it does not access memory, we can just add // direct def-use edges if (I->getCalledFunction()->doesNotAccessMemory()) return; assert (0 && "Unhandled intrinsic that accesses memory"); // for release builds, do the best we can here handleUndefinedCall(callNode, CI); return; } // we must have dest set assert(dest); // these functions touch the memory of the pointers addDataDependence(callNode, CI, dest, Offset::UNKNOWN /* FIXME */); if (src) addDataDependence(callNode, CI, src, Offset::UNKNOWN /* FIXME */); }
void DecomposeInsts::decomposeIntrinsics(BasicBlock* bb) { IRBuilder<> builder(module->getContext()); for (BasicBlock::iterator instI = bb->begin(), instE = bb->end(); instI != instE; /* empty */) { Instruction* inst = instI; // Note this increment of instI will skip decompositions of the code // inserted to decompose. E.g., if length -> dot, and dot is also to // be decomposed, then the decomposition of dot will be skipped // unless instI is reset. ++instI; IntrinsicInst* intrinsic = dyn_cast<IntrinsicInst>(inst); if (! intrinsic) continue; // Useful preamble for most case llvm::Value* arg0 = 0; llvm::Value* arg1 = 0; llvm::Value* arg2 = 0; if (inst->getNumOperands() > 0) arg0 = inst->getOperand(0); if (inst->getNumOperands() > 1) arg1 = inst->getOperand(1); if (inst->getNumOperands() > 2) arg2 = inst->getOperand(2); llvm::Value* newInst = 0; Type* instTypes[] = { inst->getType(), inst->getType(), inst->getType(), inst->getType() }; Type* argTypes[] = { arg0->getType(), arg0->getType(), arg0->getType(), arg0->getType() }; builder.SetInsertPoint(instI); switch (intrinsic->getIntrinsicID()) { case Intrinsic::gla_fRadians: { // always decompose // arg0 -> arg0 * pi / 180 const double pi_over_180 = 0.01745329251994329576923690768489; newInst = MultiplyByConstant(builder, arg0, pi_over_180); break; } case Intrinsic::gla_fDegrees: { // always decompose // arg0 -> arg0 * 180 / pi const double pi_into_180 = 57.295779513082320876798154814105; newInst = MultiplyByConstant(builder, arg0, pi_into_180); break; } case Intrinsic::gla_fMin: if (backEnd->decomposeIntrinsic(EDiMin)) { // // min(a,b) = select (a < b), a, b // llvm::Value* smeared = Smear(builder, module, arg1, arg0); newInst = builder.CreateFCmpOLT(arg0, smeared); newInst = builder.CreateSelect(newInst, arg0, smeared); } break; case Intrinsic::gla_fMax: if (backEnd->decomposeIntrinsic(EDiMax)) { // // max(a,b) = select (a > b), a, b // llvm::Value* smeared = Smear(builder, module, arg1, arg0); newInst = builder.CreateFCmpOGT(arg0, smeared); newInst = builder.CreateSelect(newInst, arg0, smeared); } break; case Intrinsic::gla_fClamp: if (backEnd->decomposeIntrinsic(EDiClamp)) { // // Clamp(x, minVal, maxVal) is defined to be min(max(x, minVal), maxVal). // // The 2nd and 3rd arguments match each other, but not necessarily // the 1st argument. In the decomposition, this difference matches // min/max's difference in their 1st and 2nd arguments. // argTypes[2] = arg1->getType(); // argTypes[*] start at 0 for the return value, arg* start at 0 for operand 0 Function* max = Intrinsic::getDeclaration(module, Intrinsic::gla_fMax, makeArrayRef(argTypes, 3)); Function* min = Intrinsic::getDeclaration(module, Intrinsic::gla_fMin, makeArrayRef(argTypes, 3)); newInst = builder.CreateCall2(max, arg0, arg1); newInst = builder.CreateCall2(min, newInst, arg2); // Make next iteration revisit this decomposition, in case min // or max are decomposed. instI = inst; ++instI; } break; case Intrinsic::gla_fAsin: if (backEnd->decomposeIntrinsic(EDiAsin)) { UnsupportedFunctionality("decomposition of gla_fAsin"); //changed = true; } break; case Intrinsic::gla_fAcos: if (backEnd->decomposeIntrinsic(EDiAcos)) { // TODO: functionality: Do we need to handle domain errors? (E.g., bad input value) // // acos(x) ~= sqrt(1-x)*(a + x*(b + x*(c + x*d))) // where a = 1.57079632679 // b = -0.213300989 // c = 0.077980478 // d = -0.0216409 // double a = 1.57079632679; double b = -0.213300989; double c = 0.077980478; double d = -0.0216409; // polynomial part, going right to left... llvm::Value* poly; poly = MultiplyByConstant(builder, arg0, d); poly = AddWithConstant(builder, poly, c); poly = builder.CreateFMul(arg0, poly); poly = AddWithConstant(builder, poly, b); poly = builder.CreateFMul(arg0, poly); poly = AddWithConstant(builder, poly, a); // sqrt part Function* sqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fSqrt, makeArrayRef(argTypes, 2)); newInst = builder.CreateFNeg(arg0); newInst = AddWithConstant(builder, newInst, 1.0); newInst = builder.CreateCall(sqrt, newInst); newInst = builder.CreateFMul(newInst, poly); } break; case Intrinsic::gla_fAtan: if (backEnd->decomposeIntrinsic(EDiAtan)) { UnsupportedFunctionality("decomposition of gla_fAtan"); //changed = true; } break; case Intrinsic::gla_fAtan2: if (backEnd->decomposeIntrinsic(EDiAtan2)) { UnsupportedFunctionality("decomposition of gla_fAtan2"); //changed = true; } break; case Intrinsic::gla_fCosh: if (backEnd->decomposeIntrinsic(EDiCosh)) { UnsupportedFunctionality("decomposition of gla_fCosh"); //changed = true; } break; case Intrinsic::gla_fSinh: if (backEnd->decomposeIntrinsic(EDiSinh)) { UnsupportedFunctionality("decomposition of gla_fSinh"); //changed = true; } break; case Intrinsic::gla_fTanh: if (backEnd->decomposeIntrinsic(EDiTanh)) { UnsupportedFunctionality("decomposition of gla_fTanh"); //changed = true; } break; case Intrinsic::gla_fAcosh: if (backEnd->decomposeIntrinsic(EDiACosh)) { UnsupportedFunctionality("decomposition of gla_fACosh"); //changed = true; } break; case Intrinsic::gla_fAsinh: if (backEnd->decomposeIntrinsic(EDiASinh)) { UnsupportedFunctionality("decomposition of gla_fASinh"); //changed = true; } break; case Intrinsic::gla_fAtanh: if (backEnd->decomposeIntrinsic(EDiATanh)) { UnsupportedFunctionality("decomposition of gla_fATanh"); //changed = true; } break; case Intrinsic::gla_fPowi: if (backEnd->decomposeIntrinsic(EDiPowi)) { UnsupportedFunctionality("decomposition of gla_fPowi"); //changed = true; } break; case Intrinsic::gla_fExp10: case Intrinsic::gla_fExp: if ((intrinsic->getIntrinsicID() == Intrinsic::gla_fExp10 && backEnd->decomposeIntrinsic(EDiExp10)) || (intrinsic->getIntrinsicID() == Intrinsic::gla_fExp && backEnd->decomposeIntrinsic(EDiExp))) { // 10^X = 2^(X /(log base 10 of 2)) // -> 10^X = 2^(X * 3.3219280948873623478703194294894) // // e^X = 2^(X /(log base e of 2)) // -> e^X = 2^(X * 1.4426950408889634073599246810019) //const double inv_log10_e = 2.3025850929940456840179914546844; // 10 -> e, in case it comes up const double inv_log10_2 = 3.3219280948873623478703194294894; // 10 -> 2 const double inv_loge_2 = 1.4426950408889634073599246810019; // e -> 2 double multiplier; if (intrinsic->getIntrinsicID() == Intrinsic::gla_fExp10) multiplier = inv_log10_2; else multiplier = inv_loge_2; newInst = MultiplyByConstant(builder, arg0, multiplier); Function* exp = Intrinsic::getDeclaration(module, Intrinsic::gla_fExp2, makeArrayRef(argTypes, 2)); newInst = builder.CreateCall(exp, newInst); } break; case Intrinsic::gla_fLog10: case Intrinsic::gla_fLog: if ((intrinsic->getIntrinsicID() == Intrinsic::gla_fLog10 && backEnd->decomposeIntrinsic(EDiLog10)) || (intrinsic->getIntrinsicID() == Intrinsic::gla_fLog && backEnd->decomposeIntrinsic(EDiLog))) { // log base 10 of X = (log base 10 of 2) * (log base 2 of X) // -> log base 10 of X = 0.30102999566398119521373889472449 * (log base 2 of X) // // log base e of X = (log base e of 2) * (log base 2 of X) // -> log base e of X = 0.69314718055994530941723212145818 * (log base 2 of X) //const double log10_e = 0.43429448190325182765112891891661; // 10 -> e, in case it comes up const double log10_2 = 0.30102999566398119521373889472449; // 10 -> 2 const double loge_2 = 0.69314718055994530941723212145818; // e -> 2 double multiplier; if (intrinsic->getIntrinsicID() == Intrinsic::gla_fLog10) multiplier = log10_2; else multiplier = loge_2; Function* log = Intrinsic::getDeclaration(module, Intrinsic::gla_fLog2, makeArrayRef(argTypes, 2)); newInst = builder.CreateCall(log, arg0); newInst = MultiplyByConstant(builder, newInst, multiplier); } break; case Intrinsic::gla_fInverseSqrt: if (backEnd->decomposeIntrinsic(EDiInverseSqrt)) { Function* sqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fSqrt, makeArrayRef(argTypes, 2)); newInst = builder.CreateCall(sqrt, arg0); newInst = builder.CreateFDiv(MakeFloatConstant(module->getContext(), 1.0), newInst); } break; case Intrinsic::gla_fFraction: if (backEnd->decomposeIntrinsic(EDiFraction)) { UnsupportedFunctionality("decomposition of gla_fFraction"); //changed = true; } break; case Intrinsic::gla_fSign: if (backEnd->decomposeIntrinsic(EDiSign)) { UnsupportedFunctionality("decomposition of gla_fSign"); //changed = true; } break; case Intrinsic::gla_fModF: if (backEnd->decomposeIntrinsic(EDiModF)) { UnsupportedFunctionality("decomposition of gla_fModF"); //changed = true; } break; case Intrinsic::gla_fMix: if (backEnd->decomposeIntrinsic(EDiMix)) { // // genType mix (x, y, a) = x * (1 - a) + y * a // llvm::Value* t; t = builder.CreateFNeg(arg2); t = AddWithConstant(builder, t, 1.0); t = builder.CreateFMul(arg0, t); newInst = builder.CreateFMul(arg1, arg2); newInst = builder.CreateFAdd(t, newInst); } break; case Intrinsic::gla_fStep: if (backEnd->decomposeIntrinsic(EDiStep)) { // // step(edge, x) is defined to be 0.0 if x < edge, otherwise 1.0. // llvm::FCmpInst::Predicate predicate = llvm::FCmpInst::FCMP_OLT; llvm::Value* condition = builder.CreateFCmp(predicate, arg1, arg0); newInst = builder.CreateSelect(condition, VectorizeConstant(GetComponentCount(arg1), MakeFloatConstant(module->getContext(), 0.0)), VectorizeConstant(GetComponentCount(arg1), MakeFloatConstant(module->getContext(), 1.0))); } break; case Intrinsic::gla_fSmoothStep: if (backEnd->decomposeIntrinsic(EDiSmoothStep)) { // // smoothstep (edge0, edge1, x) is defined to be // // t = clamp((x – edge0) / (edge1 – edge0), 0, 1) // t * t * (3 – 2 * t) // // where edge* can be scalar even if x is vector. // llvm::Value* smeared0 = Smear(builder, module, arg0, arg2); llvm::Value* smeared1 = Smear(builder, module, arg1, arg2); llvm::Value* numerator = builder.CreateFSub(arg2, smeared0, "numerator"); llvm::Value* denominator = builder.CreateFSub(smeared1, smeared0, "denominator"); llvm::Value* quotient = builder.CreateFDiv(numerator, denominator, "quotient"); llvm::Value* zero = MakeFloatConstant(module->getContext(), 0.0); llvm::Value* one = MakeFloatConstant(module->getContext(), 1.0); Type* newArgTypes[] = { quotient->getType(), quotient->getType(), zero->getType(), one->getType() }; Function* clamp = Intrinsic::getDeclaration(module, Intrinsic::gla_fClamp, newArgTypes); llvm::Value* t = builder.CreateCall3(clamp, quotient, zero, one); newInst = MultiplyByConstant(builder, t, 2.0); newInst = SubFromConstant(builder, 3.0, newInst); newInst = builder.CreateFMul(t, newInst); newInst = builder.CreateFMul(t, newInst); // Make next iteration revisit this decomposition, in case clamp is // decomposed. instI = inst; ++instI; } break; case Intrinsic::gla_fIsNan: if (backEnd->decomposeIntrinsic(EDiIsNan)) { UnsupportedFunctionality("decomposition of gla_fIsNan"); //changed = true; } break; case Intrinsic::gla_fFma: if (backEnd->decomposeIntrinsic(EDiFma)) { UnsupportedFunctionality("decomposition of gla_Fma"); //changed = true; } break; case Intrinsic::gla_fPackUnorm2x16: if (backEnd->decomposeIntrinsic(EDiPackUnorm2x16)) { UnsupportedFunctionality("decomposition of gla_fPackUnorm2x16"); //changed = true; } break; case Intrinsic::gla_fPackUnorm4x8: if (backEnd->decomposeIntrinsic(EDiPackUnorm4x8)) { UnsupportedFunctionality("decomposition of gla_fPackUnorm4x8"); //changed = true; } break; case Intrinsic::gla_fPackSnorm4x8: if (backEnd->decomposeIntrinsic(EDiPackSnorm4x8)) { UnsupportedFunctionality("decomposition of gla_fPackSnorm4x8"); //changed = true; } break; case Intrinsic::gla_fUnpackUnorm2x16: if (backEnd->decomposeIntrinsic(EDiUnpackUnorm2x16)) { UnsupportedFunctionality("decomposition of gla_fUnpackUnorm2x16"); //changed = true; } break; case Intrinsic::gla_fUnpackUnorm4x8: if (backEnd->decomposeIntrinsic(EDiUnpackUnorm4x8)) { UnsupportedFunctionality("decomposition of gla_fUnpackUnorm4x8"); //changed = true; } break; case Intrinsic::gla_fUnpackSnorm4x8: if (backEnd->decomposeIntrinsic(EDiUnpackSnorm4x8)) { UnsupportedFunctionality("decomposition of gla_fUnpackSnorm4x8"); //changed = true; } break; case Intrinsic::gla_fPackDouble2x32: if (backEnd->decomposeIntrinsic(EDiPackDouble2x32)) { UnsupportedFunctionality("decomposition of gla_fPackDouble2x32"); //changed = true; } break; case Intrinsic::gla_fUnpackDouble2x32: if (backEnd->decomposeIntrinsic(EDiUnpackDouble2x32)) { UnsupportedFunctionality("decomposition of gla_fUnpackDouble2x32"); //changed = true; } break; case Intrinsic::gla_fLength: if (backEnd->decomposeIntrinsic(EDiLength)) { if (GetComponentCount(arg0) > 1) { Function* dot = GetDotIntrinsic(module, argTypes); newInst = builder.CreateCall2(dot, arg0, arg0); Function* sqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fSqrt, makeArrayRef(instTypes, 2)); newInst = builder.CreateCall(sqrt, newInst); } else { Function* abs = Intrinsic::getDeclaration(module, Intrinsic::gla_fAbs, makeArrayRef(instTypes, 2)); newInst = builder.CreateCall(abs, arg0); } // Make next iteration revisit this decomposition, in case dot is // decomposed. instI = inst; ++instI; } break; case Intrinsic::gla_fDistance: if (backEnd->decomposeIntrinsic(EDiDistance)) { newInst = builder.CreateFSub(arg0, arg1); llvm::Type* types[] = { GetBasicType(newInst), newInst->getType() }; Function* length = Intrinsic::getDeclaration(module, Intrinsic::gla_fLength, types); newInst = builder.CreateCall(length, newInst); // Make next iteration revisit this decomposition, in case length is // decomposed. instI = inst; ++instI; } break; case Intrinsic::gla_fDot2: if (backEnd->decomposeIntrinsic(EDiDot)) { newInst = builder.CreateFMul(arg0, arg1); llvm::Value* element0 = builder.CreateExtractElement(newInst, MakeUnsignedConstant(module->getContext(), 0)); llvm::Value* element1 = builder.CreateExtractElement(newInst, MakeUnsignedConstant(module->getContext(), 1)); newInst = builder.CreateFAdd(element0, element1); } break; case Intrinsic::gla_fDot3: if (backEnd->decomposeIntrinsic(EDiDot)) { newInst = builder.CreateFMul(arg0, arg1); arg0 = newInst; llvm::Value* element0 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0)); llvm::Value* element1 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 1)); newInst = builder.CreateFAdd(element0, element1); llvm::Value* element = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 2)); newInst = builder.CreateFAdd(newInst, element); } break; case Intrinsic::gla_fDot4: if (backEnd->decomposeIntrinsic(EDiDot)) { newInst = builder.CreateFMul(arg0, arg1); arg0 = newInst; llvm::Value* element0 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0)); llvm::Value* element1 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 1)); newInst = builder.CreateFAdd(element0, element1); for (int el = 2; el < 4; ++el) { llvm::Value* element = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), el)); newInst = builder.CreateFAdd(newInst, element); } } break; case Intrinsic::gla_fCross: if (backEnd->decomposeIntrinsic(EDiCross)) { // (a1, a2, a3) X (b1, b2, b3) -> (a2*b3 - a3*b2, a3*b1 - a1*b3, a1*b2 - a2*b1) llvm::Value* a1 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0)); llvm::Value* a2 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 1)); llvm::Value* a3 = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 2)); llvm::Value* b1 = builder.CreateExtractElement(arg1, MakeUnsignedConstant(module->getContext(), 0)); llvm::Value* b2 = builder.CreateExtractElement(arg1, MakeUnsignedConstant(module->getContext(), 1)); llvm::Value* b3 = builder.CreateExtractElement(arg1, MakeUnsignedConstant(module->getContext(), 2)); llvm::Value* empty = llvm::UndefValue::get(arg0->getType()); bool scalarized = false; if (scalarized) { // do it all with scalars // a2*b3 - a3*b2 llvm::Value* p1 = builder.CreateFMul(a2, b3); llvm::Value* p2 = builder.CreateFMul(a3, b2); llvm::Value* element = builder.CreateFSub(p1, p2); newInst = builder.CreateInsertElement(empty, element, MakeUnsignedConstant(module->getContext(), 0)); // a3*b1 - a1*b3 p1 = builder.CreateFMul(a3, b1); p2 = builder.CreateFMul(a1, b3); element = builder.CreateFSub(p1, p2); newInst = builder.CreateInsertElement(newInst, element, MakeUnsignedConstant(module->getContext(), 1)); // a1*b2 - a2*b1 p1 = builder.CreateFMul(a1, b2); p2 = builder.CreateFMul(a2, b1); element = builder.CreateFSub(p1, p2); newInst = builder.CreateInsertElement(newInst, element, MakeUnsignedConstant(module->getContext(), 2)); } else { // do it all with vectors // (a2, a3, a1) llvm::Value* aPerm; aPerm = builder.CreateInsertElement(empty, a2, MakeUnsignedConstant(module->getContext(), 0)); aPerm = builder.CreateInsertElement(aPerm, a3, MakeUnsignedConstant(module->getContext(), 1)); aPerm = builder.CreateInsertElement(aPerm, a1, MakeUnsignedConstant(module->getContext(), 2)); // (b3, b1, b2) llvm::Value* bPerm; bPerm = builder.CreateInsertElement(empty, b3, MakeUnsignedConstant(module->getContext(), 0)); bPerm = builder.CreateInsertElement(bPerm, b1, MakeUnsignedConstant(module->getContext(), 1)); bPerm = builder.CreateInsertElement(bPerm, b2, MakeUnsignedConstant(module->getContext(), 2)); // first term computation llvm::Value* firstTerm = builder.CreateFMul(aPerm, bPerm); // (a3, a1, a2) aPerm = builder.CreateInsertElement(empty, a3, MakeUnsignedConstant(module->getContext(), 0)); aPerm = builder.CreateInsertElement(aPerm, a1, MakeUnsignedConstant(module->getContext(), 1)); aPerm = builder.CreateInsertElement(aPerm, a2, MakeUnsignedConstant(module->getContext(), 2)); // (b2, b3, b1) bPerm = builder.CreateInsertElement(empty, b2, MakeUnsignedConstant(module->getContext(), 0)); bPerm = builder.CreateInsertElement(bPerm, b3, MakeUnsignedConstant(module->getContext(), 1)); bPerm = builder.CreateInsertElement(bPerm, b1, MakeUnsignedConstant(module->getContext(), 2)); // second term computation newInst = builder.CreateFMul(aPerm, bPerm); // Finish it off newInst = builder.CreateFSub(firstTerm, newInst); } } break; case Intrinsic::gla_fNormalize: if (backEnd->decomposeIntrinsic(EDiNormalize)) { if (GetComponentCount(arg0) > 1) { Function* dot = GetDotIntrinsic(module, argTypes); newInst = builder.CreateCall2(dot, arg0, arg0); llvm::Type* type[] = { newInst->getType(), newInst->getType() }; Function* inverseSqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fInverseSqrt, type); newInst = builder.CreateCall(inverseSqrt, newInst); // smear it llvm::Value* smeared = llvm::UndefValue::get(arg0->getType()); for (int c = 0; c < GetComponentCount(arg0); ++c) smeared = builder.CreateInsertElement(smeared, newInst, MakeIntConstant(module->getContext(), c)); newInst = builder.CreateFMul(arg0, smeared); } else { newInst = MakeFloatConstant(module->getContext(), 1.0); } // Make next iteration revisit this decomposition, in case dot or inverse-sqrt // are decomposed. instI = inst; ++instI; } break; case Intrinsic::gla_fNormalize3D: if (backEnd->decomposeIntrinsic(EDiNormalize3D)) { // Note: This does a 3D normalize on a vec3 or vec4. The width of arg0 does // not determine that width of the dot-product input, the "3" in the "3D" does. llvm::Type* types[] = { GetBasicType(argTypes[0]), argTypes[0], argTypes[1] }; Function* dot = Intrinsic::getDeclaration(module, Intrinsic::gla_fDot3, types); newInst = builder.CreateCall2(dot, arg0, arg0); llvm::Type* type[] = { newInst->getType(), newInst->getType() }; Function* inverseSqrt = Intrinsic::getDeclaration(module, Intrinsic::gla_fInverseSqrt, type); newInst = builder.CreateCall(inverseSqrt, newInst); // smear it llvm::Value* smeared = llvm::UndefValue::get(arg0->getType()); for (int c = 0; c < GetComponentCount(arg0); ++c) smeared = builder.CreateInsertElement(smeared, newInst, MakeIntConstant(module->getContext(), c)); // If we're 4-wide, copy over the original w component if (GetComponentCount(arg0) == 4) smeared = builder.CreateInsertElement(smeared, arg0, MakeIntConstant(module->getContext(), 4)); newInst = builder.CreateFMul(arg0, smeared); // Make next iteration revisit this decomposition, in case dot or inverse-sqrt // are decomposed. instI = inst; ++instI; } break; case Intrinsic::gla_fLit: if (backEnd->decomposeIntrinsic(EDiLit)) { UnsupportedFunctionality("decomposition of gla_fLit"); //changed = true; } break; case Intrinsic::gla_fFaceForward: if (backEnd->decomposeIntrinsic(EDiFaceForward)) { // // faceForward(N, I, Nref) is defined to be N if dot(Nref, I) < 0, otherwise return –N. // UnsupportedFunctionality("decomposition of gla_fFaceForward"); //changed = true; } break; case Intrinsic::gla_fReflect: if (backEnd->decomposeIntrinsic(EDiReflect)) { // // reflect(I, N) is defined to be I – 2 * dot(N, I) * N, // where N may be assumed to be normalized. // // Note if the number of components is 1, then N == 1 and // this turns into I - 2*I, or -I. // if (GetComponentCount(arg0) > 1) { Function* dot = GetDotIntrinsic(module, argTypes); newInst = builder.CreateCall2(dot, arg0, arg1); newInst = MultiplyByConstant(builder, newInst, 2.0); // smear this back up to a vector again llvm::Value* smeared = llvm::UndefValue::get(arg0->getType()); for (int c = 0; c < GetComponentCount(arg0); ++c) smeared = builder.CreateInsertElement(smeared, newInst, MakeIntConstant(module->getContext(), c)); newInst = builder.CreateFMul(smeared, arg1); newInst = builder.CreateFSub(arg0, newInst); } else { newInst = builder.CreateFNeg(arg0); } // Make next iteration revisit this decomposition, in case dot // is decomposed instI = inst; ++instI; } break; case Intrinsic::gla_fRefract: if (backEnd->decomposeIntrinsic(EDiRefract)) { UnsupportedFunctionality("decomposition of gla_fRefract"); //changed = true; } break; case Intrinsic::gla_fFilterWidth: if (backEnd->decomposeIntrinsic(EDiFilterWidth)) { // filterWidth = abs(dFdx(p)) + abs(dFdy(p)) Function* dFdx = Intrinsic::getDeclaration(module, Intrinsic::gla_fDFdx, makeArrayRef(argTypes, 2)); Function* dFdy = Intrinsic::getDeclaration(module, Intrinsic::gla_fDFdy, makeArrayRef(argTypes, 2)); Function* abs = Intrinsic::getDeclaration(module, Intrinsic::gla_fAbs, makeArrayRef(instTypes, 2)); llvm::Value* dx = builder.CreateCall(dFdx, arg0); llvm::Value* dy = builder.CreateCall(dFdy, arg0); dx = builder.CreateCall(abs, dx); dy = builder.CreateCall(abs, dy); newInst = builder.CreateFAdd(dx, dy); } break; case Intrinsic::gla_fFixedTransform: if (backEnd->decomposeIntrinsic(EDiFixedTransform)) { UnsupportedFunctionality("decomposition of gla_fFixedTransform"); //changed = true; } break; case Intrinsic::gla_any: if (backEnd->decomposeIntrinsic(EDiAny)) { if (GetComponentCount(arg0) == 1) UnsupportedFunctionality("any() on a scalar"); newInst = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0)); for (int c = 1; c < GetComponentCount(arg0); ++c) { llvm::Value* comp = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), c)); newInst = builder.CreateOr(newInst, comp); } } break; case Intrinsic::gla_all: if (backEnd->decomposeIntrinsic(EDiAll)) { if (GetComponentCount(arg0) == 1) UnsupportedFunctionality("all() on a scalar"); newInst = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), 0)); for (int c = 1; c < GetComponentCount(arg0); ++c) { llvm::Value* comp = builder.CreateExtractElement(arg0, MakeUnsignedConstant(module->getContext(), c)); newInst = builder.CreateAnd(newInst, comp); } } break; case Intrinsic::gla_not: if (backEnd->decomposeIntrinsic(EDiNot)) { if (GetComponentCount(arg0) == 1) UnsupportedFunctionality("not() on a scalar"); newInst = builder.CreateNot(arg0); } break; case Intrinsic::gla_fTextureSample: case Intrinsic::gla_fTextureSampleLodRefZ: case Intrinsic::gla_fTextureSampleLodRefZOffset: case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad: if (backEnd->decomposeIntrinsic(EDiTextureProjection)) { // if projection flag is set, divide all coordinates (and refZ) by projection int texFlags = GetConstantInt(intrinsic->getArgOperand(GetTextureOpIndex(ETOFlag))); if (texFlags & ETFProjected) { // insert before intrinsic since we are not replacing it builder.SetInsertPoint(inst); // turn off projected flag to reflect decomposition texFlags &= ~ETFProjected; llvm::Value* coords = intrinsic->getArgOperand(GetTextureOpIndex(ETOCoord)); // determine how many channels are live after decomposition int newCoordWidth = 0; switch (GetConstantInt(intrinsic->getArgOperand(gla::ETOSamplerType))) { case gla::ESamplerBuffer: case gla::ESampler1D: newCoordWidth = 1; break; case gla::ESampler2D: case gla::ESampler2DRect: case gla::ESampler2DMS: newCoordWidth = 2; break; case gla::ESampler3D: newCoordWidth = 3; break; case gla::ESamplerCube: gla::UnsupportedFunctionality("projection with cube sampler"); break; default: assert(0 && "Unknown sampler type"); break; } if (texFlags & gla::ETFArrayed) gla::UnsupportedFunctionality("projection with arrayed sampler"); // projection resides in last component llvm::Value* projIdx = MakeUnsignedConstant(module->getContext(), GetComponentCount(coords) - 1); llvm::Value* divisor = builder.CreateExtractElement(coords, projIdx); llvm::Type* newCoordType; if (newCoordWidth > 1) newCoordType = llvm::VectorType::get(GetBasicType(coords), newCoordWidth); else newCoordType = GetBasicType(coords); // create space to hold results llvm::Value* newCoords = llvm::UndefValue::get(newCoordType); llvm::Value* smearedProj = llvm::UndefValue::get(newCoordType); if (newCoordWidth > 1) { for (int i = 0; i < newCoordWidth; ++i) { llvm::Value* idx = MakeUnsignedConstant(module->getContext(), i); // smear projection smearedProj = builder.CreateInsertElement(smearedProj, divisor, idx); // shrink coordinates to remove projection component llvm::Value* oldCoord = builder.CreateExtractElement(coords, idx); newCoords = builder.CreateInsertElement(newCoords, oldCoord, idx); } } else { smearedProj = divisor; newCoords = builder.CreateExtractElement(coords, MakeUnsignedConstant(module->getContext(), 0)); } // divide coordinates newCoords = builder.CreateFDiv(newCoords, smearedProj); // // Remaining code declares new intrinsic and modifies call arguments // // build up argTypes for flexible parameters, including result llvm::SmallVector<llvm::Type*, 5> types; // result type types.push_back(intrinsic->getType()); // use new coords to reflect shrink types.push_back(newCoords->getType()); // add offset switch (intrinsic->getIntrinsicID()) { case Intrinsic::gla_fTextureSampleLodRefZOffset: case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad: types.push_back(intrinsic->getArgOperand(ETOOffset)->getType()); default: break; } // add gradients switch (intrinsic->getIntrinsicID()) { case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad: types.push_back(intrinsic->getArgOperand(ETODPdx)->getType()); types.push_back(intrinsic->getArgOperand(ETODPdy)->getType()); default: break; } // declare the new intrinsic // TODO: functionality: texturing correctness: is this getting the correct non-projective form? Function* texture = Intrinsic::getDeclaration(module, intrinsic->getIntrinsicID(), types); // modify arguments to match new intrinsic intrinsic->setCalledFunction(texture); intrinsic->setArgOperand(ETOFlag, MakeUnsignedConstant(module->getContext(), texFlags)); intrinsic->setArgOperand(ETOCoord, newCoords); switch (intrinsic->getIntrinsicID()) { case Intrinsic::gla_fTextureSampleLodRefZ: case Intrinsic::gla_fTextureSampleLodRefZOffset: case Intrinsic::gla_fTextureSampleLodRefZOffsetGrad: intrinsic->setArgOperand(ETORefZ, builder.CreateFDiv(intrinsic->getArgOperand(ETORefZ), divisor)); default: break; } // mark our change, but don't replace the intrinsic changed = true; } } break; default: // The cases above needs to be comprehensive in terms of checking // for what intrinsics to decompose. If not there the assumption is // it never needs to be decomposed. break; } if (newInst) { inst->replaceAllUsesWith(newInst); inst->dropAllReferences(); inst->eraseFromParent(); changed = true; } } }
/// Returns true if the beginning of this instruction can be safely shortened /// in length. static bool isShortenableAtTheBeginning(Instruction *I) { // FIXME: Handle only memset for now. Supporting memcpy/memmove should be // easily done by offsetting the source address. IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); return II && II->getIntrinsicID() == Intrinsic::memset; }
/// getModRefInfo - Check to see if the specified callsite can clobber the /// specified memory object. Since we only look at local properties of this /// function, we really can't say much about this query. We do, however, use /// simple "address taken" analysis on local objects. AliasAnalysis::ModRefResult BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { const Value *Object = P->getUnderlyingObject(); // If this is a tail call and P points to a stack location, we know that // the tail call cannot access or modify the local stack. // We cannot exclude byval arguments here; these belong to the caller of // the current function not to the current function, and a tail callee // may reference them. if (isa<AllocaInst>(Object)) if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) if (CI->isTailCall()) return NoModRef; // If the pointer is to a locally allocated object that does not escape, // then the call can not mod/ref the pointer unless the call takes the pointer // as an argument, and itself doesn't capture it. if (!isa<Constant>(Object) && CS.getInstruction() != Object && isNonEscapingLocalObject(Object)) { bool PassedAsArg = false; unsigned ArgNo = 0; for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI, ++ArgNo) { // Only look at the no-capture pointer arguments. if (!isa<PointerType>((*CI)->getType()) || !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)) continue; // If this is a no-capture pointer argument, see if we can tell that it // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't // escape. if (!isNoAlias(cast<Value>(CI), ~0U, P, ~0U)) { PassedAsArg = true; break; } } if (!PassedAsArg) return NoModRef; } // Finally, handle specific knowledge of intrinsics. IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); if (II == 0) return AliasAnalysis::getModRefInfo(CS, P, Size); switch (II->getIntrinsicID()) { default: break; case Intrinsic::memcpy: case Intrinsic::memmove: { unsigned Len = ~0U; if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3))) Len = LenCI->getZExtValue(); Value *Dest = II->getOperand(1); Value *Src = II->getOperand(2); if (isNoAlias(Dest, Len, P, Size)) { if (isNoAlias(Src, Len, P, Size)) return NoModRef; return Ref; } break; } case Intrinsic::memset: // Since memset is 'accesses arguments' only, the AliasAnalysis base class // will handle it for the variable length case. if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3))) { unsigned Len = LenCI->getZExtValue(); Value *Dest = II->getOperand(1); if (isNoAlias(Dest, Len, P, Size)) return NoModRef; } break; case Intrinsic::atomic_cmp_swap: case Intrinsic::atomic_swap: case Intrinsic::atomic_load_add: case Intrinsic::atomic_load_sub: case Intrinsic::atomic_load_and: case Intrinsic::atomic_load_nand: case Intrinsic::atomic_load_or: case Intrinsic::atomic_load_xor: case Intrinsic::atomic_load_max: case Intrinsic::atomic_load_min: case Intrinsic::atomic_load_umax: case Intrinsic::atomic_load_umin: if (TD) { Value *Op1 = II->getOperand(1); unsigned Op1Size = TD->getTypeStoreSize(Op1->getType()); if (isNoAlias(Op1, Op1Size, P, Size)) return NoModRef; } break; case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: { unsigned PtrSize = cast<ConstantInt>(II->getOperand(1))->getZExtValue(); if (isNoAlias(II->getOperand(2), PtrSize, P, Size)) return NoModRef; break; } case Intrinsic::invariant_end: { unsigned PtrSize = cast<ConstantInt>(II->getOperand(2))->getZExtValue(); if (isNoAlias(II->getOperand(3), PtrSize, P, Size)) return NoModRef; break; } } // The AliasAnalysis base class has some smarts, lets use them. return AliasAnalysis::getModRefInfo(CS, P, Size); }
bool IntrinsicCleanerPass::runOnBasicBlock(BasicBlock &b, Module &M) { bool dirty = false; bool block_split=false; #if LLVM_VERSION_CODE <= LLVM_VERSION(3, 1) unsigned WordSize = TargetData.getPointerSizeInBits() / 8; #else unsigned WordSize = DataLayout.getPointerSizeInBits() / 8; #endif for (BasicBlock::iterator i = b.begin(), ie = b.end(); (i != ie) && (block_split == false);) { IntrinsicInst *ii = dyn_cast<IntrinsicInst>(&*i); // increment now since LowerIntrinsic deletion makes iterator invalid. ++i; if(ii) { switch (ii->getIntrinsicID()) { case Intrinsic::vastart: case Intrinsic::vaend: break; // Lower vacopy so that object resolution etc is handled by // normal instructions. // // FIXME: This is much more target dependent than just the word size, // however this works for x86-32 and x86-64. case Intrinsic::vacopy: { // (dst, src) -> *((i8**) dst) = *((i8**) src) Value *dst = ii->getArgOperand(0); Value *src = ii->getArgOperand(1); if (WordSize == 4) { Type *i8pp = PointerType::getUnqual(PointerType::getUnqual(Type::getInt8Ty(getGlobalContext()))); Value *castedDst = CastInst::CreatePointerCast(dst, i8pp, "vacopy.cast.dst", ii); Value *castedSrc = CastInst::CreatePointerCast(src, i8pp, "vacopy.cast.src", ii); Value *load = new LoadInst(castedSrc, "vacopy.read", ii); new StoreInst(load, castedDst, false, ii); } else { assert(WordSize == 8 && "Invalid word size!"); Type *i64p = PointerType::getUnqual(Type::getInt64Ty(getGlobalContext())); Value *pDst = CastInst::CreatePointerCast(dst, i64p, "vacopy.cast.dst", ii); Value *pSrc = CastInst::CreatePointerCast(src, i64p, "vacopy.cast.src", ii); Value *val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii); Value *off = ConstantInt::get(Type::getInt64Ty(getGlobalContext()), 1); pDst = GetElementPtrInst::Create(pDst, off, std::string(), ii); pSrc = GetElementPtrInst::Create(pSrc, off, std::string(), ii); val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii); pDst = GetElementPtrInst::Create(pDst, off, std::string(), ii); pSrc = GetElementPtrInst::Create(pSrc, off, std::string(), ii); val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii); } ii->removeFromParent(); delete ii; break; } case Intrinsic::sadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::umul_with_overflow: { IRBuilder<> builder(ii->getParent(), ii); Value *op1 = ii->getArgOperand(0); Value *op2 = ii->getArgOperand(1); Value *result = 0; Value *result_ext = 0; Value *overflow = 0; unsigned int bw = op1->getType()->getPrimitiveSizeInBits(); unsigned int bw2 = op1->getType()->getPrimitiveSizeInBits()*2; if ((ii->getIntrinsicID() == Intrinsic::uadd_with_overflow) || (ii->getIntrinsicID() == Intrinsic::usub_with_overflow) || (ii->getIntrinsicID() == Intrinsic::umul_with_overflow)) { Value *op1ext = builder.CreateZExt(op1, IntegerType::get(M.getContext(), bw2)); Value *op2ext = builder.CreateZExt(op2, IntegerType::get(M.getContext(), bw2)); Value *int_max_s = ConstantInt::get(op1->getType(), APInt::getMaxValue(bw)); Value *int_max = builder.CreateZExt(int_max_s, IntegerType::get(M.getContext(), bw2)); if (ii->getIntrinsicID() == Intrinsic::uadd_with_overflow){ result_ext = builder.CreateAdd(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::usub_with_overflow){ result_ext = builder.CreateSub(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::umul_with_overflow){ result_ext = builder.CreateMul(op1ext, op2ext); } overflow = builder.CreateICmpUGT(result_ext, int_max); } else if ((ii->getIntrinsicID() == Intrinsic::sadd_with_overflow) || (ii->getIntrinsicID() == Intrinsic::ssub_with_overflow) || (ii->getIntrinsicID() == Intrinsic::smul_with_overflow)) { Value *op1ext = builder.CreateSExt(op1, IntegerType::get(M.getContext(), bw2)); Value *op2ext = builder.CreateSExt(op2, IntegerType::get(M.getContext(), bw2)); Value *int_max_s = ConstantInt::get(op1->getType(), APInt::getSignedMaxValue(bw)); Value *int_min_s = ConstantInt::get(op1->getType(), APInt::getSignedMinValue(bw)); Value *int_max = builder.CreateSExt(int_max_s, IntegerType::get(M.getContext(), bw2)); Value *int_min = builder.CreateSExt(int_min_s, IntegerType::get(M.getContext(), bw2)); if (ii->getIntrinsicID() == Intrinsic::sadd_with_overflow){ result_ext = builder.CreateAdd(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::ssub_with_overflow){ result_ext = builder.CreateSub(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::smul_with_overflow){ result_ext = builder.CreateMul(op1ext, op2ext); } overflow = builder.CreateOr(builder.CreateICmpSGT(result_ext, int_max), builder.CreateICmpSLT(result_ext, int_min)); } // This trunc cound be replaced by a more general trunc replacement // that allows to detect also undefined behavior in assignments or // overflow in operation with integers whose dimension is smaller than // int's dimension, e.g. // uint8_t = uint8_t + uint8_t; // if one desires the wrapping should write // uint8_t = (uint8_t + uint8_t) & 0xFF; // before this, must check if it has side effects on other operations result = builder.CreateTrunc(result_ext, op1->getType()); Value *resultStruct = builder.CreateInsertValue(UndefValue::get(ii->getType()), result, 0); resultStruct = builder.CreateInsertValue(resultStruct, overflow, 1); ii->replaceAllUsesWith(resultStruct); ii->removeFromParent(); delete ii; dirty = true; break; } case Intrinsic::dbg_value: case Intrinsic::dbg_declare: // Remove these regardless of lower intrinsics flag. This can // be removed once IntrinsicLowering is fixed to not have bad // caches. ii->eraseFromParent(); dirty = true; break; case Intrinsic::trap: { // Intrisic instruction "llvm.trap" found. Directly lower it to // a call of the abort() function. Function *F = cast<Function>( M.getOrInsertFunction( "abort", Type::getVoidTy(getGlobalContext()), NULL)); F->setDoesNotReturn(); F->setDoesNotThrow(); CallInst::Create(F, Twine(), ii); new UnreachableInst(getGlobalContext(), ii); ii->eraseFromParent(); dirty = true; break; } case Intrinsic::objectsize: { // We don't know the size of an object in general so we replace // with 0 or -1 depending on the second argument to the intrinsic. assert(ii->getNumArgOperands() == 2 && "wrong number of arguments"); Value *minArg = ii->getArgOperand(1); assert(minArg && "Failed to get second argument"); ConstantInt *minArgAsInt = dyn_cast<ConstantInt>(minArg); assert(minArgAsInt && "Second arg is not a ConstantInt"); assert(minArgAsInt->getBitWidth() == 1 && "Second argument is not an i1"); Value *replacement = NULL; LLVM_TYPE_Q IntegerType *intType = dyn_cast<IntegerType>(ii->getType()); assert(intType && "intrinsic does not have integer return type"); if (minArgAsInt->isZero()) { // min=false replacement = ConstantInt::get(intType, -1, /*isSigned=*/true); } else { // min=true replacement = ConstantInt::get(intType, 0, /*isSigned=*/false); } ii->replaceAllUsesWith(replacement); ii->eraseFromParent(); dirty = true; break; } default: if (LowerIntrinsics) IL->LowerIntrinsicCall(ii); dirty = true; break; } } } return dirty; }
void CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const { Function *Func = (*Blocks.begin())->getParent(); ExitBlock = getCommonExitBlock(Blocks); for (BasicBlock &BB : *Func) { if (Blocks.count(&BB)) continue; for (Instruction &II : BB) { auto *AI = dyn_cast<AllocaInst>(&II); if (!AI) continue; // Find the pair of life time markers for address 'Addr' that are either // defined inside the outline region or can legally be shrinkwrapped into // the outline region. If there are not other untracked uses of the // address, return the pair of markers if found; otherwise return a pair // of nullptr. auto GetLifeTimeMarkers = [&](Instruction *Addr, bool &SinkLifeStart, bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> { Instruction *LifeStart = nullptr, *LifeEnd = nullptr; for (User *U : Addr->users()) { IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U); if (IntrInst) { if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { // Do not handle the case where AI has multiple start markers. if (LifeStart) return std::make_pair<Instruction *>(nullptr, nullptr); LifeStart = IntrInst; } if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { if (LifeEnd) return std::make_pair<Instruction *>(nullptr, nullptr); LifeEnd = IntrInst; } continue; } // Find untracked uses of the address, bail. if (!definedInRegion(Blocks, U)) return std::make_pair<Instruction *>(nullptr, nullptr); } if (!LifeStart || !LifeEnd) return std::make_pair<Instruction *>(nullptr, nullptr); SinkLifeStart = !definedInRegion(Blocks, LifeStart); HoistLifeEnd = !definedInRegion(Blocks, LifeEnd); // Do legality Check. if ((SinkLifeStart || HoistLifeEnd) && !isLegalToShrinkwrapLifetimeMarkers(Addr)) return std::make_pair<Instruction *>(nullptr, nullptr); // Check to see if we have a place to do hoisting, if not, bail. if (HoistLifeEnd && !ExitBlock) return std::make_pair<Instruction *>(nullptr, nullptr); return std::make_pair(LifeStart, LifeEnd); }; bool SinkLifeStart = false, HoistLifeEnd = false; auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd); if (Markers.first) { if (SinkLifeStart) SinkCands.insert(Markers.first); SinkCands.insert(AI); if (HoistLifeEnd) HoistCands.insert(Markers.second); continue; } // Follow the bitcast. Instruction *MarkerAddr = nullptr; for (User *U : AI->users()) { if (U->stripInBoundsConstantOffsets() == AI) { SinkLifeStart = false; HoistLifeEnd = false; Instruction *Bitcast = cast<Instruction>(U); Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd); if (Markers.first) { MarkerAddr = Bitcast; continue; } } // Found unknown use of AI. if (!definedInRegion(Blocks, U)) { MarkerAddr = nullptr; break; } } if (MarkerAddr) { if (SinkLifeStart) SinkCands.insert(Markers.first); if (!definedInRegion(Blocks, MarkerAddr)) SinkCands.insert(MarkerAddr); SinkCands.insert(AI); if (HoistLifeEnd) HoistCands.insert(Markers.second); } } } }