bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32( IntrinsicInst &I) const { assert(I.getIntrinsicID() == Intrinsic::bitreverse && "I must be bitreverse intrinsic"); assert(needsPromotionToI32(I.getType()) && "I does not need promotion to i32"); IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); Type *I32Ty = getI32Ty(Builder, I.getType()); Function *I32 = Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty }); Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty); Value *ExtRes = Builder.CreateCall(I32, { ExtOp }); Value *LShrOp = Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType())); Value *TruncRes = Builder.CreateTrunc(LShrOp, I.getType()); I.replaceAllUsesWith(TruncRes); I.eraseFromParent(); return true; }
void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) return; IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I)) return; DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); const Function &ContainingFunction = *I.getParent()->getParent(); // FIXME: We should also try to get this value from the reqd_work_group_size // function attribute if it is available. unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction); int AllocaSize = WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy); if (AllocaSize > LocalMemAvailable) { DEBUG(dbgs() << " Not enough local memory to promote alloca.\n"); return; } std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); LocalMemAvailable -= AllocaSize; Function *F = I.getParent()->getParent(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); GV->setUnnamedAddr(true); GV->setAlignment(I.getAlignment()); Value *TCntY, *TCntZ; std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder); Value *TIdX = getWorkitemID(Builder, 0); Value *TIdY = getWorkitemID(Builder, 1); Value *TIdZ = getWorkitemID(Builder, 2); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); Value *Indices[] = { Constant::getNullValue(Type::getInt32Ty(Mod->getContext())), TID }; Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (Value *V : WorkList) { CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); // The operand's value should be corrected on its own. if (isa<AddrSpaceCastInst>(V)) continue; // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); continue; } IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call); if (!Intr) { // FIXME: What is this for? It doesn't make sense to promote arbitrary // function calls. If the call is to a defined function that can also be // promoted, we should be able to do this once that function is also // rewritten. std::vector<Type*> ArgTypes; for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands(); ArgIdx != ArgEnd; ++ArgIdx) { ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType()); } Function *F = Call->getCalledFunction(); FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes, F->isVarArg()); Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(), NewType, F->getAttributes()); Function *NewF = cast<Function>(C); Call->setCalledFunction(NewF); continue; } Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), MemCpy->getLength(), MemCpy->getAlignment(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast<MemMoveInst>(Intr); Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(), MemMove->getLength(), MemMove->getAlignment(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any // address space. continue; case Intrinsic::objectsize: { Value *Src = Intr->getOperand(0); Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) }); Intr->replaceAllUsesWith(NewCall); Intr->eraseFromParent(); continue; } default: Intr->dump(); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } }
// FIXME: Should try to pick the most likely to be profitable allocas first. bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) return false; IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I, AS)) return true; // Promoted to vector. const Function &ContainingFunction = *I.getParent()->getParent(); CallingConv::ID CC = ContainingFunction.getCallingConv(); // Don't promote the alloca to LDS for shader calling conventions as the work // item ID intrinsics are not supported for these calling conventions. // Furthermore not all LDS is available for some of the stages. switch (CC) { case CallingConv::AMDGPU_KERNEL: case CallingConv::SPIR_KERNEL: break; default: DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n"); return false; } // Not likely to have sufficient local memory for promotion. if (!SufficientLDS) return false; const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction); unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second; const DataLayout &DL = Mod->getDataLayout(); unsigned Align = I.getAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(I.getAllocatedType()); // FIXME: This computed padding is likely wrong since it depends on inverse // usage order. // // FIXME: It is also possible that if we're allowed to use all of the memory // could could end up using more than the maximum due to alignment padding. uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align); uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy); NewSize += AllocSize; if (NewSize > LocalMemLimit) { DEBUG(dbgs() << " " << AllocSize << " bytes of local memory not available to promote\n"); return false; } CurrentLocalMemUsage = NewSize; std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, &I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return false; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); Function *F = I.getParent()->getParent(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, AS.LOCAL_ADDRESS); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); GV->setAlignment(I.getAlignment()); Value *TCntY, *TCntZ; std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder); Value *TIdX = getWorkitemID(Builder, 0); Value *TIdY = getWorkitemID(Builder, 1); Value *TIdZ = getWorkitemID(Builder, 2); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); Value *Indices[] = { Constant::getNullValue(Type::getInt32Ty(Mod->getContext())), TID }; Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (Value *V : WorkList) { CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) { Value *Src0 = CI->getOperand(0); Type *EltTy = Src0->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); if (isa<ConstantPointerNull>(CI->getOperand(0))) CI->setOperand(0, ConstantPointerNull::get(NewTy)); if (isa<ConstantPointerNull>(CI->getOperand(1))) CI->setOperand(1, ConstantPointerNull::get(NewTy)); continue; } // The operand's value should be corrected on its own and we don't want to // touch the users. if (isa<AddrSpaceCastInst>(V)) continue; Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); // Adjust the types of any constant operands. if (SelectInst *SI = dyn_cast<SelectInst>(V)) { if (isa<ConstantPointerNull>(SI->getOperand(1))) SI->setOperand(1, ConstantPointerNull::get(NewTy)); if (isa<ConstantPointerNull>(SI->getOperand(2))) SI->setOperand(2, ConstantPointerNull::get(NewTy)); } else if (PHINode *Phi = dyn_cast<PHINode>(V)) { for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { if (isa<ConstantPointerNull>(Phi->getIncomingValue(I))) Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy)); } } continue; } IntrinsicInst *Intr = cast<IntrinsicInst>(Call); Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(), MemCpy->getRawSource(), MemCpy->getSourceAlignment(), MemCpy->getLength(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast<MemMoveInst>(Intr); Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(), MemMove->getRawSource(), MemMove->getSourceAlignment(), MemMove->getLength(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getDestAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any // address space. continue; case Intrinsic::objectsize: { Value *Src = Intr->getOperand(0); Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall( ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)}); Intr->replaceAllUsesWith(NewCall); Intr->eraseFromParent(); continue; } default: Intr->print(errs()); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } return true; }
/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The /// "unwind" part of these invokes jump to a landing pad within the current /// function. This is a candidate to merge the selector associated with the URoR /// invoke with the one from the URoR's landing pad. bool DwarfEHPrepare::HandleURoRInvokes() { if (!EHCatchAllValue) { EHCatchAllValue = F->getParent()->getNamedGlobal("llvm.eh.catch.all.value"); if (!EHCatchAllValue) return false; } if (!SelectorIntrinsic) { SelectorIntrinsic = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); if (!SelectorIntrinsic) return false; } SmallPtrSet<IntrinsicInst*, 32> Sels; SmallPtrSet<IntrinsicInst*, 32> CatchAllSels; FindAllCleanupSelectors(Sels, CatchAllSels); if (!DT) // We require DominatorTree information. return CleanupSelectors(CatchAllSels); if (!URoR) { URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); if (!URoR) return CleanupSelectors(CatchAllSels); } SmallPtrSet<InvokeInst*, 32> URoRInvokes; FindAllURoRInvokes(URoRInvokes); SmallPtrSet<IntrinsicInst*, 32> SelsToConvert; for (SmallPtrSet<IntrinsicInst*, 32>::iterator SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) { const BasicBlock *SelBB = (*SI)->getParent(); for (SmallPtrSet<InvokeInst*, 32>::iterator UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) { const BasicBlock *URoRBB = (*UI)->getParent(); if (DT->dominates(SelBB, URoRBB)) { SelsToConvert.insert(*SI); break; } } } bool Changed = false; if (Sels.size() != SelsToConvert.size()) { // If we haven't been able to convert all of the clean-up selectors, then // loop through the slow way to see if they still need to be converted. if (!ExceptionValueIntrinsic) { ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception); if (!ExceptionValueIntrinsic) return CleanupSelectors(CatchAllSels); } for (Value::use_iterator I = ExceptionValueIntrinsic->use_begin(), E = ExceptionValueIntrinsic->use_end(); I != E; ++I) { IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I); if (!EHPtr || EHPtr->getParent()->getParent() != F) continue; Changed |= PromoteEHPtrStore(EHPtr); bool URoRInvoke = false; SmallPtrSet<IntrinsicInst*, 8> SelCalls; Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls); if (URoRInvoke) { // This EH pointer is being used by an invoke of an URoR instruction and // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we // need to convert it to a 'catch-all'. for (SmallPtrSet<IntrinsicInst*, 8>::iterator SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) if (!HasCatchAllInSelector(*SI)) SelsToConvert.insert(*SI); } } } if (!SelsToConvert.empty()) { // Convert all clean-up eh.selectors, which are associated with "invokes" of // URoR calls, into catch-all eh.selectors. Changed = true; for (SmallPtrSet<IntrinsicInst*, 8>::iterator SI = SelsToConvert.begin(), SE = SelsToConvert.end(); SI != SE; ++SI) { IntrinsicInst *II = *SI; // Use the exception object pointer and the personality function // from the original selector. CallSite CS(II); IntrinsicInst::op_iterator I = CS.arg_begin(); IntrinsicInst::op_iterator E = CS.arg_end(); IntrinsicInst::op_iterator B = prior(E); // Exclude last argument if it is an integer. if (isa<ConstantInt>(B)) E = B; // Add exception object pointer (front). // Add personality function (next). // Add in any filter IDs (rest). SmallVector<Value*, 8> Args(I, E); Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. CallInst *NewSelector = CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(), "eh.sel.catch.all", II); NewSelector->setTailCall(II->isTailCall()); NewSelector->setAttributes(II->getAttributes()); NewSelector->setCallingConv(II->getCallingConv()); II->replaceAllUsesWith(NewSelector); II->eraseFromParent(); } } Changed |= CleanupSelectors(CatchAllSels); return Changed; }
bool IntrinsicCleanerPass::runOnBasicBlock(BasicBlock &b, Module &M) { bool dirty = false; bool block_split=false; #if LLVM_VERSION_CODE <= LLVM_VERSION(3, 1) unsigned WordSize = TargetData.getPointerSizeInBits() / 8; #else unsigned WordSize = DataLayout.getPointerSizeInBits() / 8; #endif for (BasicBlock::iterator i = b.begin(), ie = b.end(); (i != ie) && (block_split == false);) { IntrinsicInst *ii = dyn_cast<IntrinsicInst>(&*i); // increment now since LowerIntrinsic deletion makes iterator invalid. ++i; if(ii) { switch (ii->getIntrinsicID()) { case Intrinsic::vastart: case Intrinsic::vaend: break; // Lower vacopy so that object resolution etc is handled by // normal instructions. // // FIXME: This is much more target dependent than just the word size, // however this works for x86-32 and x86-64. case Intrinsic::vacopy: { // (dst, src) -> *((i8**) dst) = *((i8**) src) Value *dst = ii->getArgOperand(0); Value *src = ii->getArgOperand(1); if (WordSize == 4) { Type *i8pp = PointerType::getUnqual(PointerType::getUnqual(Type::getInt8Ty(getGlobalContext()))); Value *castedDst = CastInst::CreatePointerCast(dst, i8pp, "vacopy.cast.dst", ii); Value *castedSrc = CastInst::CreatePointerCast(src, i8pp, "vacopy.cast.src", ii); Value *load = new LoadInst(castedSrc, "vacopy.read", ii); new StoreInst(load, castedDst, false, ii); } else { assert(WordSize == 8 && "Invalid word size!"); Type *i64p = PointerType::getUnqual(Type::getInt64Ty(getGlobalContext())); Value *pDst = CastInst::CreatePointerCast(dst, i64p, "vacopy.cast.dst", ii); Value *pSrc = CastInst::CreatePointerCast(src, i64p, "vacopy.cast.src", ii); Value *val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii); Value *off = ConstantInt::get(Type::getInt64Ty(getGlobalContext()), 1); pDst = GetElementPtrInst::Create(pDst, off, std::string(), ii); pSrc = GetElementPtrInst::Create(pSrc, off, std::string(), ii); val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii); pDst = GetElementPtrInst::Create(pDst, off, std::string(), ii); pSrc = GetElementPtrInst::Create(pSrc, off, std::string(), ii); val = new LoadInst(pSrc, std::string(), ii); new StoreInst(val, pDst, ii); } ii->removeFromParent(); delete ii; break; } case Intrinsic::sadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::umul_with_overflow: { IRBuilder<> builder(ii->getParent(), ii); Value *op1 = ii->getArgOperand(0); Value *op2 = ii->getArgOperand(1); Value *result = 0; Value *result_ext = 0; Value *overflow = 0; unsigned int bw = op1->getType()->getPrimitiveSizeInBits(); unsigned int bw2 = op1->getType()->getPrimitiveSizeInBits()*2; if ((ii->getIntrinsicID() == Intrinsic::uadd_with_overflow) || (ii->getIntrinsicID() == Intrinsic::usub_with_overflow) || (ii->getIntrinsicID() == Intrinsic::umul_with_overflow)) { Value *op1ext = builder.CreateZExt(op1, IntegerType::get(M.getContext(), bw2)); Value *op2ext = builder.CreateZExt(op2, IntegerType::get(M.getContext(), bw2)); Value *int_max_s = ConstantInt::get(op1->getType(), APInt::getMaxValue(bw)); Value *int_max = builder.CreateZExt(int_max_s, IntegerType::get(M.getContext(), bw2)); if (ii->getIntrinsicID() == Intrinsic::uadd_with_overflow){ result_ext = builder.CreateAdd(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::usub_with_overflow){ result_ext = builder.CreateSub(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::umul_with_overflow){ result_ext = builder.CreateMul(op1ext, op2ext); } overflow = builder.CreateICmpUGT(result_ext, int_max); } else if ((ii->getIntrinsicID() == Intrinsic::sadd_with_overflow) || (ii->getIntrinsicID() == Intrinsic::ssub_with_overflow) || (ii->getIntrinsicID() == Intrinsic::smul_with_overflow)) { Value *op1ext = builder.CreateSExt(op1, IntegerType::get(M.getContext(), bw2)); Value *op2ext = builder.CreateSExt(op2, IntegerType::get(M.getContext(), bw2)); Value *int_max_s = ConstantInt::get(op1->getType(), APInt::getSignedMaxValue(bw)); Value *int_min_s = ConstantInt::get(op1->getType(), APInt::getSignedMinValue(bw)); Value *int_max = builder.CreateSExt(int_max_s, IntegerType::get(M.getContext(), bw2)); Value *int_min = builder.CreateSExt(int_min_s, IntegerType::get(M.getContext(), bw2)); if (ii->getIntrinsicID() == Intrinsic::sadd_with_overflow){ result_ext = builder.CreateAdd(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::ssub_with_overflow){ result_ext = builder.CreateSub(op1ext, op2ext); } else if (ii->getIntrinsicID() == Intrinsic::smul_with_overflow){ result_ext = builder.CreateMul(op1ext, op2ext); } overflow = builder.CreateOr(builder.CreateICmpSGT(result_ext, int_max), builder.CreateICmpSLT(result_ext, int_min)); } // This trunc cound be replaced by a more general trunc replacement // that allows to detect also undefined behavior in assignments or // overflow in operation with integers whose dimension is smaller than // int's dimension, e.g. // uint8_t = uint8_t + uint8_t; // if one desires the wrapping should write // uint8_t = (uint8_t + uint8_t) & 0xFF; // before this, must check if it has side effects on other operations result = builder.CreateTrunc(result_ext, op1->getType()); Value *resultStruct = builder.CreateInsertValue(UndefValue::get(ii->getType()), result, 0); resultStruct = builder.CreateInsertValue(resultStruct, overflow, 1); ii->replaceAllUsesWith(resultStruct); ii->removeFromParent(); delete ii; dirty = true; break; } case Intrinsic::dbg_value: case Intrinsic::dbg_declare: // Remove these regardless of lower intrinsics flag. This can // be removed once IntrinsicLowering is fixed to not have bad // caches. ii->eraseFromParent(); dirty = true; break; case Intrinsic::trap: { // Intrisic instruction "llvm.trap" found. Directly lower it to // a call of the abort() function. Function *F = cast<Function>( M.getOrInsertFunction( "abort", Type::getVoidTy(getGlobalContext()), NULL)); F->setDoesNotReturn(); F->setDoesNotThrow(); CallInst::Create(F, Twine(), ii); new UnreachableInst(getGlobalContext(), ii); ii->eraseFromParent(); dirty = true; break; } case Intrinsic::objectsize: { // We don't know the size of an object in general so we replace // with 0 or -1 depending on the second argument to the intrinsic. assert(ii->getNumArgOperands() == 2 && "wrong number of arguments"); Value *minArg = ii->getArgOperand(1); assert(minArg && "Failed to get second argument"); ConstantInt *minArgAsInt = dyn_cast<ConstantInt>(minArg); assert(minArgAsInt && "Second arg is not a ConstantInt"); assert(minArgAsInt->getBitWidth() == 1 && "Second argument is not an i1"); Value *replacement = NULL; LLVM_TYPE_Q IntegerType *intType = dyn_cast<IntegerType>(ii->getType()); assert(intType && "intrinsic does not have integer return type"); if (minArgAsInt->isZero()) { // min=false replacement = ConstantInt::get(intType, -1, /*isSigned=*/true); } else { // min=true replacement = ConstantInt::get(intType, 0, /*isSigned=*/false); } ii->replaceAllUsesWith(replacement); ii->eraseFromParent(); dirty = true; break; } default: if (LowerIntrinsics) IL->LowerIntrinsicCall(ii); dirty = true; break; } } } return dirty; }