Value *VectorBlockGenerator::generateStrideOneLoad( ScopStmt &Stmt, const LoadInst *Load, VectorValueMapT &ScalarMaps, bool NegativeStride = false) { unsigned VectorWidth = getVectorWidth(); const Value *Pointer = Load->getPointerOperand(); Type *VectorPtrType = getVectorPtrTy(Pointer, VectorWidth); unsigned Offset = NegativeStride ? VectorWidth - 1 : 0; Value *NewPointer = nullptr; NewPointer = generateLocationAccessed(Stmt, Load, Pointer, ScalarMaps[Offset], GlobalMaps[Offset], VLTS[Offset]); Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, "vector_ptr"); LoadInst *VecLoad = Builder.CreateLoad(VectorPtr, Load->getName() + "_p_vec_full"); if (!Aligned) VecLoad->setAlignment(8); if (NegativeStride) { SmallVector<Constant *, 16> Indices; for (int i = VectorWidth - 1; i >= 0; i--) Indices.push_back(ConstantInt::get(Builder.getInt32Ty(), i)); Constant *SV = llvm::ConstantVector::get(Indices); Value *RevVecLoad = Builder.CreateShuffleVector( VecLoad, VecLoad, SV, Load->getName() + "_reverse"); return RevVecLoad; } return VecLoad; }
void WorklessInstrument::CreateIfElseBlock(Loop * pLoop, vector<BasicBlock *> & vecAdded) { BasicBlock * pPreHeader = pLoop->getLoopPreheader(); BasicBlock * pHeader = pLoop->getHeader(); Function * pInnerFunction = pPreHeader->getParent(); Module * pModule = pPreHeader->getParent()->getParent(); BasicBlock * pElseBody = NULL; TerminatorInst * pTerminator = NULL; BranchInst * pBranch = NULL; LoadInst * pLoad1 = NULL; LoadInst * pLoad2 = NULL; LoadInst * pLoadnumGlobalCounter = NULL; BinaryOperator * pAddOne = NULL; StoreInst * pStoreNew = NULL; CmpInst * pCmp = NULL; CallInst * pCall = NULL; StoreInst * pStore = NULL; AttributeSet emptySet; pTerminator = pPreHeader->getTerminator(); pLoadnumGlobalCounter = new LoadInst(this->numGlobalCounter, "", false, pTerminator); pLoadnumGlobalCounter->setAlignment(8); pAddOne = BinaryOperator::Create(Instruction::Add, pLoadnumGlobalCounter, this->ConstantLong1, "add", pTerminator); pStoreNew = new StoreInst(pAddOne, this->numGlobalCounter, false, pTerminator); pStoreNew->setAlignment(8); pElseBody = BasicBlock::Create(pModule->getContext(), ".else.body.CPI", pInnerFunction, 0); pLoad2 = new LoadInst(this->CURRENT_SAMPLE, "", false, pTerminator); pLoad2->setAlignment(8); pCmp = new ICmpInst(pTerminator, ICmpInst::ICMP_SLT, pAddOne, pLoad2, ""); pBranch = BranchInst::Create(pHeader, pElseBody, pCmp ); ReplaceInstWithInst(pTerminator, pBranch); pLoad1 = new LoadInst(this->SAMPLE_RATE, "", false, pElseBody); pCall = CallInst::Create(this->geo, pLoad1, "", pElseBody); pCall->setCallingConv(CallingConv::C); pCall->setTailCall(false); pCall->setAttributes(emptySet); CastInst * pCast = CastInst::CreateIntegerCast(pCall, this->LongType, true, "", pElseBody); //pBinary = BinaryOperator::Create(Instruction::Add, pLoad2, pCast, "add", pIfBody); pStore = new StoreInst(pCast, this->CURRENT_SAMPLE, false, pElseBody); pStore->setAlignment(8); pStore = new StoreInst(this->ConstantLong0, this->numGlobalCounter, false, pElseBody); pStore->setAlignment(8); pLoad1 = new LoadInst(this->numInstances, "", false, pElseBody); pLoad1->setAlignment(8); pAddOne = BinaryOperator::Create(Instruction::Add, pLoad1, this->ConstantLong1, "add", pElseBody); pStore = new StoreInst(pAddOne, this->numInstances, false, pElseBody); pStore->setAlignment(8); vecAdded.push_back(pPreHeader); vecAdded.push_back(pElseBody); }
extern "C" LLVMValueRef LLVMRustBuildAtomicLoad(LLVMBuilderRef B, LLVMValueRef Source, const char *Name, LLVMAtomicOrdering Order, unsigned Alignment) { LoadInst *LI = new LoadInst(unwrap(Source), 0); LI->setAtomic(fromRust(Order)); LI->setAlignment(Alignment); return wrap(unwrap(B)->Insert(LI, Name)); }
/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, const DataLayout *DL) { User *CI = cast<User>(LI.getOperand(0)); Value *CastOp = CI->getOperand(0); PointerType *DestTy = cast<PointerType>(CI->getType()); Type *DestPTy = DestTy->getElementType(); if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { // If the address spaces don't match, don't eliminate the cast. if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) return 0; Type *SrcPTy = SrcTy->getElementType(); if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || DestPTy->isVectorTy()) { // If the source is an array, the code below will not succeed. Check to // see if a trivial 'gep P, 0, 0' will help matters. Only do this for // constants. if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) if (Constant *CSrc = dyn_cast<Constant>(CastOp)) if (ASrcTy->getNumElements() != 0) { Type *IdxTy = DL ? DL->getIntPtrType(SrcTy) : Type::getInt64Ty(SrcTy->getContext()); Value *Idx = Constant::getNullValue(IdxTy); Value *Idxs[2] = { Idx, Idx }; CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); SrcTy = cast<PointerType>(CastOp->getType()); SrcPTy = SrcTy->getElementType(); } if (IC.getDataLayout() && (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || SrcPTy->isVectorTy()) && // Do not allow turning this into a load of an integer, which is then // casted to a pointer, this pessimizes pointer analysis a lot. (SrcPTy->isPtrOrPtrVectorTy() == LI.getType()->isPtrOrPtrVectorTy()) && IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before the load, cast // the result of the loaded value. LoadInst *NewLoad = IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); NewLoad->setAlignment(LI.getAlignment()); NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); // Now cast the result of the load. return new BitCastInst(NewLoad, LI.getType()); } } } return 0; }
llvm::Value *StorageSoa::alignedArrayLoad(llvm::Value *val) { VectorType *vectorType = VectorType::get(Type::FloatTy, 4); PointerType *vectorPtr = PointerType::get(vectorType, 0); CastInst *cast = new BitCastInst(val, vectorPtr, name("toVector"), m_block); LoadInst *load = new LoadInst(cast, name("alignLoad"), false, m_block); load->setAlignment(8); return load; }
extern "C" LLVMValueRef LLVMBuildAtomicLoad(LLVMBuilderRef B, LLVMValueRef source, const char* Name, AtomicOrdering order) { LoadInst* li = new LoadInst(unwrap(source),0); li->setVolatile(true); li->setAtomic(order); li->setAlignment(sizeof(intptr_t)); return wrap(unwrap(B)->Insert(li, Name)); }
extern "C" LLVMValueRef LLVMBuildAtomicLoad(LLVMBuilderRef B, LLVMValueRef source, const char* Name, AtomicOrdering order, unsigned alignment) { LoadInst* li = new LoadInst(unwrap(source),0); li->setAtomic(order); li->setAlignment(alignment); return wrap(unwrap(B)->Insert(li, Name)); }
LLVMValueRef mono_llvm_build_aligned_load (LLVMBuilderRef builder, LLVMValueRef PointerVal, const char *Name, gboolean is_volatile, int alignment) { LoadInst *ins; ins = unwrap(builder)->CreateLoad(unwrap(PointerVal), is_volatile, Name); ins->setAlignment (alignment); return wrap(ins); }
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB) { BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end(); // Skip static allocas at the top of the entry block so they don't become // dynamic when we split the block. If we used our optimized stack layout, // then there will only be one alloca and it will come first. for (; IP != BE; ++IP) { AllocaInst *AI = dyn_cast<AllocaInst>(IP); if (!AI || !AI->isStaticAlloca()) break; } bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc = IsEntryBB ? IP->getDebugLoc().getFnDebugLoc(*C) : IP->getDebugLoc(); IRBuilder<> IRB(IP); IRB.SetCurrentDebugLocation(EntryLoc); SmallVector<Value *, 1> Indices; Value *GuardP = IRB.CreateAdd( IRB.CreatePointerCast(GuardArray, IntptrTy), ConstantInt::get(IntptrTy, (1 + SanCovFunction->getNumUses()) * 4)); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); LoadInst *Load = IRB.CreateLoad(GuardP); Load->setAtomic(Monotonic); Load->setAlignment(4); Load->setMetadata(F.getParent()->getMDKindID("nosanitize"), MDNode::get(*C, None)); Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); Instruction *Ins = SplitBlockAndInsertIfThen( Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); IRB.SetInsertPoint(Ins); IRB.SetCurrentDebugLocation(EntryLoc); // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. IRB.CreateCall(SanCovFunction, GuardP); IRB.CreateCall(EmptyAsm); // Avoids callback merge. if (ClExperimentalTracing) { // Experimental support for tracing. // Insert a callback with the same guard variable as used for coverage. IRB.SetInsertPoint(IP); IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); } }
Value *VectorBlockGenerator::generateStrideZeroLoad(const LoadInst *Load, ValueMapT &BBMap) { const Value *Pointer = Load->getPointerOperand(); Type *VectorPtrType = getVectorPtrTy(Pointer, 1); Value *NewPointer = getNewValue(Pointer, BBMap, GlobalMaps[0], VLTS[0], getLoopForInst(Load)); Value *VectorPtr = Builder.CreateBitCast(NewPointer, VectorPtrType, Load->getName() + "_p_vec_p"); LoadInst *ScalarLoad = Builder.CreateLoad(VectorPtr, Load->getName() + "_p_splat_one"); if (!Aligned) ScalarLoad->setAlignment(8); Constant *SplatVector = Constant::getNullValue( VectorType::get(Builder.getInt32Ty(), getVectorWidth())); Value *VectorLoad = Builder.CreateShuffleVector( ScalarLoad, ScalarLoad, SplatVector, Load->getName() + "_p_splat"); return VectorLoad; }
LLVMValueRef mono_llvm_build_atomic_load (LLVMBuilderRef builder, LLVMValueRef PointerVal, const char *Name, gboolean is_volatile, int alignment, BarrierKind barrier) { LoadInst *ins = unwrap(builder)->CreateLoad(unwrap(PointerVal), is_volatile, Name); ins->setAlignment (alignment); switch (barrier) { case LLVM_BARRIER_NONE: break; case LLVM_BARRIER_ACQ: ins->setOrdering(Acquire); break; case LLVM_BARRIER_SEQ: ins->setOrdering(SequentiallyConsistent); break; default: g_assert_not_reached (); break; } return wrap(ins); }
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx, bool UseCalls) { BasicBlock::iterator IP = BB.getFirstInsertionPt(); bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; if (IsEntryBB) { if (auto SP = F.getSubprogram()) EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP); // Keep static allocas and llvm.localescape calls in the entry block. Even // if we aren't splitting the block, it's nice for allocas to be before // calls. IP = PrepareToSplitEntryBlock(BB, IP); } else { EntryLoc = IP->getDebugLoc(); } IRBuilder<> IRB(&*IP); IRB.SetCurrentDebugLocation(EntryLoc); if (Options.TracePC) { IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC. IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. } else if (Options.TracePCGuard) { //auto GuardVar = new GlobalVariable( // *F.getParent(), Int64Ty, false, GlobalVariable::LinkOnceODRLinkage, // Constant::getNullValue(Int64Ty), "__sancov_guard." + F.getName()); // if (auto Comdat = F.getComdat()) // GuardVar->setComdat(Comdat); // TODO: add debug into to GuardVar. // GuardVar->setSection(SanCovTracePCGuardSection); // auto GuardPtr = IRB.CreatePointerCast(GuardVar, IntptrPtrTy); auto GuardPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy), ConstantInt::get(IntptrTy, Idx * 4)), Int32PtrTy); if (!UseCalls) { auto GuardLoad = IRB.CreateLoad(GuardPtr); GuardLoad->setAtomic(AtomicOrdering::Monotonic); GuardLoad->setAlignment(8); SetNoSanitizeMetadata(GuardLoad); // Don't instrument with e.g. asan. auto Cmp = IRB.CreateICmpNE( GuardLoad, Constant::getNullValue(GuardLoad->getType())); auto Ins = SplitBlockAndInsertIfThen( Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); IRB.SetCurrentDebugLocation(EntryLoc); IRB.SetInsertPoint(Ins); } IRB.CreateCall(SanCovTracePCGuard, GuardPtr); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. } else { Value *GuardP = IRB.CreateAdd( IRB.CreatePointerCast(GuardArray, IntptrTy), ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); if (Options.TraceBB) { IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); } else if (UseCalls) { IRB.CreateCall(SanCovWithCheckFunction, GuardP); } else { LoadInst *Load = IRB.CreateLoad(GuardP); Load->setAtomic(AtomicOrdering::Monotonic); Load->setAlignment(4); SetNoSanitizeMetadata(Load); Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); Instruction *Ins = SplitBlockAndInsertIfThen( Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); IRB.SetInsertPoint(Ins); IRB.SetCurrentDebugLocation(EntryLoc); // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. IRB.CreateCall(SanCovFunction, GuardP); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. } } if (Options.Use8bitCounters) { IRB.SetInsertPoint(&*IP); Value *P = IRB.CreateAdd( IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy()); LoadInst *LI = IRB.CreateLoad(P); Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1)); StoreInst *SI = IRB.CreateStore(Inc, P); SetNoSanitizeMetadata(LI); SetNoSanitizeMetadata(SI); } }
/// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. static Function * doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote, SmallPtrSetImpl<Argument *> &ByValArgsToTransform, Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>> ReplaceCallSite) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. FunctionType *FTy = F->getFunctionType(); std::vector<Type *> Params; using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we // can add one argument for each. // // Arguments that are directly loaded will have a zero element value here, to // handle cases where there are both a direct load and GEP accesses. std::map<Argument *, ScalarizeTable> ScalarizedElements; // OriginalLoads - Keep track of a representative load instruction from the // original function so that we can tell the alias analysis implementation // what the new GEP/Load instructions we are inserting look like. // We need to keep the original loads for each argument and the elements // of the argument that are accessed. std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads; // Attribute - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeSet, 8> ArgAttrVec; AttributeList PAL = F->getAttributes(); // First, determine the new argument list unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) { if (ByValArgsToTransform.count(&*I)) { // Simple byval argument? Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Params.insert(Params.end(), STy->element_begin(), STy->element_end()); ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(), AttributeSet()); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(&*I)) { // Unchanged argument Params.push_back(I->getType()); ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; // There may be remaining metadata uses of the argument for things like // llvm.dbg.value. Replace them with undef. I->replaceAllUsesWith(UndefValue::get(I->getType())); } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; for (User *U : I->users()) { Instruction *UI = cast<Instruction>(U); Type *SrcTy; if (LoadInst *L = dyn_cast<LoadInst>(UI)) SrcTy = L->getType(); else SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType(); IndicesVector Indices; Indices.reserve(UI->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single // non-index operand, this will record direct loads without any indices, // and gep+loads with the GEP indices. for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); II != IE; ++II) Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); ArgIndices.insert(std::make_pair(SrcTy, Indices)); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(UI)) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast<LoadInst>(UI->user_back()); OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad; } // Add a parameter to the function for each element passed in. for (const auto &ArgIndex : ArgIndices) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType( cast<PointerType>(I->getType()->getScalarType())->getElementType(), ArgIndex.second)); ArgAttrVec.push_back(AttributeSet()); assert(Params.back()); } if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; } } Type *RetTy = FTy->getReturnType(); // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); // Patch the pointer to LLVM function in debug info descriptor. NF->setSubprogram(F->getSubprogram()); F->setSubprogram(nullptr); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(), PAL.getRetAttributes(), ArgAttrVec)); ArgAttrVec.clear(); F->getParent()->getFunctionList().insert(F->getIterator(), NF); NF->takeName(F); // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value *, 16> Args; while (!F->use_empty()) { CallSite CS(F->user_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttributeList &CallPAL = CS.getAttributes(); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgNo) if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { Args.push_back(*AI); // Unmodified argument ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } else if (ByValArgsToTransform.count(&*I)) { // Emit a GEP and load for each element of the struct. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create( STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call)); ArgAttrVec.push_back(AttributeSet()); } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value *> Ops; for (const auto &ArgIndex : ArgIndices) { Value *V = *AI; LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, ArgIndex.second)]; if (!ArgIndex.second.empty()) { Ops.reserve(ArgIndex.second.size()); Type *ElTy = V->getType(); for (auto II : ArgIndex.second) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, II)); // Keep track of the type we're currently indexing. if (auto *ElPTy = dyn_cast<PointerType>(ElTy)) ElTy = ElPTy->getElementType(); else ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II); } // And create a GEP to extract those indices. V = GetElementPtrInst::Create(ArgIndex.first, V, Ops, V->getName() + ".idx", Call); Ops.clear(); } // Since we're replacing a load make sure we take the alignment // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); // Transfer the AA info too. AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); newLoad->setAAMetadata(AAInfo); Args.push_back(newLoad); ArgAttrVec.push_back(AttributeSet()); } } // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgNo) { Args.push_back(*AI); ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo)); } SmallVector<OperandBundleDef, 1> OpBundles; CS.getOperandBundlesAsDefs(OpBundles); CallSite NewCS; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args, OpBundles, "", Call); } else { auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call); NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind()); NewCS = NewCall; } NewCS.setCallingConv(CS.getCallingConv()); NewCS.setAttributes( AttributeList::get(F->getContext(), CallPAL.getFnAttributes(), CallPAL.getRetAttributes(), ArgAttrVec)); NewCS->setDebugLoc(Call->getDebugLoc()); uint64_t W; if (Call->extractProfTotalWeight(W)) NewCS->setProfWeight(W); Args.clear(); ArgAttrVec.clear(); // Update the callgraph to know that the callsite has been transformed. if (ReplaceCallSite) (*ReplaceCallSite)(CS, NewCS); if (!Call->use_empty()) { Call->replaceAllUsesWith(NewCS.getInstruction()); NewCS->takeName(Call); } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } const DataLayout &DL = F->getParent()->getDataLayout(); // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transferring uses of the old arguments over to // the new arguments, also transferring over the names as well. for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) { // If this is an unmodified argument, move the name and users over to the // new version. I->replaceAllUsesWith(&*I2); I2->takeName(&*I); ++I2; continue; } if (ByValArgsToTransform.count(&*I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = &NF->begin()->front(); // Just add all the struct element types. Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr, I->getParamAlignment(), "", InsertPt); StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr}; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create( AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i), InsertPt); I2->setName(I->getName() + "." + Twine(i)); new StoreInst(&*I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); TheAlloca->takeName(&*I); // If the alloca is used in a call, we must clear the tail flag since // the callee now uses an alloca from the caller. for (User *U : TheAlloca->users()) { CallInst *Call = dyn_cast<CallInst>(U); if (!Call) continue; Call->setTailCall(false); } continue; } if (I->use_empty()) continue; // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) { assert(ArgIndices.begin()->second.empty() && "Load element should sort to front!"); I2->setName(I->getName() + ".val"); LI->replaceAllUsesWith(&*I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back()); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Operands.size() == 1 && Operands.front() == 0) Operands.clear(); Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); It->second != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } std::string NewName = I->getName(); for (unsigned i = 0, e = Operands.size(); i != e; ++i) { NewName += "." + utostr(Operands[i]); } NewName += ".val"; TheArg->setName(NewName); DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast<LoadInst>(GEP->user_back()); L->replaceAllUsesWith(&*TheArg); L->eraseFromParent(); } GEP->eraseFromParent(); } } // Increment I2 past all of the arguments added for this promoted pointer. std::advance(I2, ArgIndices.size()); } return NF; }
/// tryAggregating - When scanning forward over instructions, we look for /// other loads or stores that could be aggregated with this one. /// Returns the last instruction added (if one was added) since we might have /// removed some loads or stores and that might invalidate an iterator. Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value *StartPtr, bool DebugThis) { if (TD == 0) return 0; Module* M = StartInst->getParent()->getParent()->getParent(); LLVMContext& Context = StartInst->getContext(); Type* int8Ty = Type::getInt8Ty(Context); Type* sizeTy = Type::getInt64Ty(Context); Type* globalInt8PtrTy = int8Ty->getPointerTo(globalSpace); bool isLoad = isa<LoadInst>(StartInst); bool isStore = isa<StoreInst>(StartInst); Instruction *lastAddedInsn = NULL; Instruction *LastLoadOrStore = NULL; SmallVector<Instruction*, 8> toRemove; // Okay, so we now have a single global load/store. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemOpRanges Ranges(*TD); // Put the first store in since we want to preserve the order. Ranges.addInst(0, StartInst); BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if( isGlobalLoadOrStore(BI, globalSpace, isLoad, isStore) ) { // OK! } else { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (BI->mayWriteToMemory()) break; if (isStore && BI->mayReadFromMemory()) break; continue; } if ( isStore && isa<StoreInst>(BI) ) { StoreInst *NextStore = cast<StoreInst>(BI); // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); LastLoadOrStore = NextStore; } else { LoadInst *NextLoad = cast<LoadInst>(BI); if (!NextLoad->isSimple()) break; // Check to see if this load is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextLoad->getPointerOperand(), Offset, *TD)) break; Ranges.addLoad(Offset, NextLoad); LastLoadOrStore = NextLoad; } } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (!Ranges.moreThanOneOp()) return 0; // Divide the instructions between StartInst and LastLoadOrStore into // addressing, memops, and uses of memops (uses of loads) reorderAddressingMemopsUses(StartInst, LastLoadOrStore, DebugThis); Instruction* insertBefore = StartInst; IRBuilder<> builder(insertBefore); // Now that we have full information about ranges, loop over the ranges and // emit memcpy's for anything big enough to be worthwhile. for (MemOpRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemOpRange &Range = *I; Value* oldBaseI = NULL; Value* newBaseI = NULL; if (Range.TheStores.size() == 1) continue; // Don't bother if there's only one thing... builder.SetInsertPoint(insertBefore); // Otherwise, we do want to transform this! Create a new memcpy. // Get the starting pointer of the block. StartPtr = Range.StartPtr; if( DebugThis ) { errs() << "base is:"; StartPtr->dump(); } // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } Instruction *alloc = NULL; Value *globalPtr = NULL; // create temporary alloca space to communicate to/from. alloc = makeAlloca(int8Ty, "agg.tmp", insertBefore, Range.End-Range.Start, Alignment); // Generate the old and new base pointers before we output // anything else. { Type* iPtrTy = TD->getIntPtrType(alloc->getType()); Type* iNewBaseTy = TD->getIntPtrType(alloc->getType()); oldBaseI = builder.CreatePtrToInt(StartPtr, iPtrTy, "agg.tmp.oldb.i"); newBaseI = builder.CreatePtrToInt(alloc, iNewBaseTy, "agg.tmp.newb.i"); } // If storing, do the stores we had into our alloca'd region. if( isStore ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { StoreInst* oldStore = cast<StoreInst>(*SI); if( DebugThis ) { errs() << "have store in range:"; oldStore->dump(); } Value* ptrToAlloc = rebasePointer(oldStore->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldStore->isVolatile() || oldStore->isAtomic())); StoreInst* newStore = builder.CreateStore(oldStore->getValueOperand(), ptrToAlloc); newStore->setAlignment(oldStore->getAlignment()); newStore->takeName(oldStore); } } // cast the pointer that was load/stored to i8 if necessary. if( StartPtr->getType()->getPointerElementType() == int8Ty ) { globalPtr = StartPtr; } else { globalPtr = builder.CreatePointerCast(StartPtr, globalInt8PtrTy, "agg.cast"); } // Get a Constant* for the length. Constant* len = ConstantInt::get(sizeTy, Range.End-Range.Start, false); // Now add the memcpy instruction unsigned addrSpaceDst,addrSpaceSrc; addrSpaceDst = addrSpaceSrc = 0; if( isStore ) addrSpaceDst = globalSpace; if( isLoad ) addrSpaceSrc = globalSpace; Type *types[3]; types[0] = PointerType::get(int8Ty, addrSpaceDst); types[1] = PointerType::get(int8Ty, addrSpaceSrc); types[2] = sizeTy; Function *func = Intrinsic::getDeclaration(M, Intrinsic::memcpy, types); Value* args[5]; // dst src len alignment isvolatile if( isStore ) { // it's a store (ie put) args[0] = globalPtr; args[1] = alloc; } else { // it's a load (ie get) args[0] = alloc; args[1] = globalPtr; } args[2] = len; // alignment args[3] = ConstantInt::get(Type::getInt32Ty(Context), 0, false); // isvolatile args[4] = ConstantInt::get(Type::getInt1Ty(Context), 0, false); Instruction* aMemCpy = builder.CreateCall(func, args); /* DEBUG(dbgs() << "Replace ops:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); */ if (!Range.TheStores.empty()) aMemCpy->setDebugLoc(Range.TheStores[0]->getDebugLoc()); lastAddedInsn = aMemCpy; // If loading, load from the memcpy'd region if( isLoad ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { LoadInst* oldLoad = cast<LoadInst>(*SI); if( DebugThis ) { errs() << "have load in range:"; oldLoad->dump(); } Value* ptrToAlloc = rebasePointer(oldLoad->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldLoad->isVolatile() || oldLoad->isAtomic())); LoadInst* newLoad = builder.CreateLoad(ptrToAlloc); newLoad->setAlignment(oldLoad->getAlignment()); oldLoad->replaceAllUsesWith(newLoad); newLoad->takeName(oldLoad); lastAddedInsn = newLoad; } } // Save old loads/stores for removal for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { Instruction* insn = *SI; toRemove.push_back(insn); } } // Zap all the old loads/stores for (SmallVector<Instruction*, 16>::const_iterator SI = toRemove.begin(), SE = toRemove.end(); SI != SE; ++SI) { (*SI)->eraseFromParent(); } return lastAddedInsn; }
// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the // upgraded intrinsic. All argument and return casting must be provided in // order to seamlessly integrate with existing context. void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Function *F = CI->getCalledFunction(); LLVMContext &C = CI->getContext(); ImmutableCallSite CS(CI); assert(F && "CallInst has no function associated with it."); if (!NewFn) { if (F->getName() == "llvm.x86.sse.loadu.ps" || F->getName() == "llvm.x86.sse2.loadu.dq" || F->getName() == "llvm.x86.sse2.loadu.pd") { // Convert to a native, unaligned load. const Type *VecTy = CI->getType(); const Type *IntTy = IntegerType::get(C, 128); IRBuilder<> Builder(C); Builder.SetInsertPoint(CI->getParent(), CI); Value *BC = Builder.CreateBitCast(CI->getArgOperand(0), PointerType::getUnqual(IntTy), "cast"); LoadInst *LI = Builder.CreateLoad(BC, CI->getName()); LI->setAlignment(1); // Unaligned load. BC = Builder.CreateBitCast(LI, VecTy, "new.cast"); // Fix up all the uses with our new load. if (!CI->use_empty()) CI->replaceAllUsesWith(BC); // Remove intrinsic. CI->eraseFromParent(); } else if (F->getName() == "llvm.x86.sse.movnt.ps" || F->getName() == "llvm.x86.sse2.movnt.dq" || F->getName() == "llvm.x86.sse2.movnt.pd" || F->getName() == "llvm.x86.sse2.movnt.i") { IRBuilder<> Builder(C); Builder.SetInsertPoint(CI->getParent(), CI); Module *M = F->getParent(); SmallVector<Value *, 1> Elts; Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1)); MDNode *Node = MDNode::get(C, Elts); Value *Arg0 = CI->getArgOperand(0); Value *Arg1 = CI->getArgOperand(1); // Convert the type of the pointer to a pointer to the stored type. Value *BC = Builder.CreateBitCast(Arg0, PointerType::getUnqual(Arg1->getType()), "cast"); StoreInst *SI = Builder.CreateStore(Arg1, BC); SI->setMetadata(M->getMDKindID("nontemporal"), Node); SI->setAlignment(16); // Remove intrinsic. CI->eraseFromParent(); } else { llvm_unreachable("Unknown function for CallInst upgrade."); } return; } switch (NewFn->getIntrinsicID()) { case Intrinsic::prefetch: { IRBuilder<> Builder(C); Builder.SetInsertPoint(CI->getParent(), CI); const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext()); // Add the extra "data cache" argument Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), llvm::ConstantInt::get(I32Ty, 1) }; CallInst *NewCI = CallInst::Create(NewFn, Operands, CI->getName(), CI); NewCI->setTailCall(CI->isTailCall()); NewCI->setCallingConv(CI->getCallingConv()); // Handle any uses of the old CallInst. if (!CI->use_empty()) // Replace all uses of the old call with the new cast which has the // correct type. CI->replaceAllUsesWith(NewCI); // Clean up the old call now that it has been completely upgraded. CI->eraseFromParent(); break; } } }
/// DoPromotion - This method actually performs the promotion of the specified /// arguments, and returns the new function. At this point, we know that it's /// safe to do so. CallGraphNode *ArgPromotion::DoPromotion(Function *F, SmallPtrSet<Argument*, 8> &ArgsToPromote, SmallPtrSet<Argument*, 8> &ByValArgsToTransform) { // Start by computing a new prototype for the function, which is the same as // the old function, but has modified arguments. const FunctionType *FTy = F->getFunctionType(); std::vector<const Type*> Params; typedef std::set<IndicesVector> ScalarizeTable; // ScalarizedElements - If we are promoting a pointer that has elements // accessed out of it, keep track of which elements are accessed so that we // can add one argument for each. // // Arguments that are directly loaded will have a zero element value here, to // handle cases where there are both a direct load and GEP accesses. // std::map<Argument*, ScalarizeTable> ScalarizedElements; // OriginalLoads - Keep track of a representative load instruction from the // original function so that we can tell the alias analysis implementation // what the new GEP/Load instructions we are inserting look like. std::map<IndicesVector, LoadInst*> OriginalLoads; // Attributes - Keep track of the parameter attributes for the arguments // that we are *not* promoting. For the ones that we do promote, the parameter // attributes are lost SmallVector<AttributeWithIndex, 8> AttributesVec; const AttrListPtr &PAL = F->getAttributes(); // Add any return attributes. if (Attributes attrs = PAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // First, determine the new argument list unsigned ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgIndex) { if (ByValArgsToTransform.count(I)) { // Simple byval argument? Just add all the struct element types. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) Params.push_back(STy->getElementType(i)); ++NumByValArgsPromoted; } else if (!ArgsToPromote.count(I)) { // Unchanged argument Params.push_back(I->getType()); if (Attributes attrs = PAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs)); } else if (I->use_empty()) { // Dead argument (which are always marked as promotable) ++NumArgumentsDead; } else { // Okay, this is being promoted. This means that the only uses are loads // or GEPs which are only used by loads // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[I]; for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { Instruction *User = cast<Instruction>(*UI); assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User)); IndicesVector Indices; Indices.reserve(User->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single // non-index operand, this will record direct loads without any indices, // and gep+loads with the GEP indices. for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end(); II != IE; ++II) Indices.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Indices.size() == 1 && Indices.front() == 0) Indices.clear(); ArgIndices.insert(Indices); LoadInst *OrigLoad; if (LoadInst *L = dyn_cast<LoadInst>(User)) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis OrigLoad = cast<LoadInst>(User->use_back()); OriginalLoads[Indices] = OrigLoad; } // Add a parameter to the function for each element passed in. for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { // not allowed to dereference ->begin() if size() is 0 Params.push_back(GetElementPtrInst::getIndexedType(I->getType(), SI->begin(), SI->end())); assert(Params.back()); } if (ArgIndices.size() == 1 && ArgIndices.begin()->empty()) ++NumArgumentsPromoted; else ++NumAggregatesPromoted; } } // Add any function attributes. if (Attributes attrs = PAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); const Type *RetTy = FTy->getReturnType(); // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which // have zero fixed arguments. bool ExtraArgHack = false; if (Params.empty() && FTy->isVarArg()) { ExtraArgHack = true; Params.push_back(Type::getInt32Ty(F->getContext())); } // Construct the new function type using the new arguments. FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg()); // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); // Recompute the parameter attributes list based on the new arguments for // the function. NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); AttributesVec.clear(); F->getParent()->getFunctionList().insert(F, NF); NF->takeName(F); // Get the alias analysis information that we need to update to reflect our // changes. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); // Get the callgraph information that we need to update to reflect our // changes. CallGraph &CG = getAnalysis<CallGraph>(); // Get a new callgraph node for NF. CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); // Loop over all of the callers of the function, transforming the call sites // to pass in the loaded pointers. // SmallVector<Value*, 16> Args; while (!F->use_empty()) { CallSite CS = CallSite::get(F->use_back()); assert(CS.getCalledFunction() == F); Instruction *Call = CS.getInstruction(); const AttrListPtr &CallPAL = CS.getAttributes(); // Add any return attributes. if (Attributes attrs = CallPAL.getRetAttributes()) AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); // Loop over the operands, inserting GEP and loads in the caller as // appropriate. CallSite::arg_iterator AI = CS.arg_begin(); ArgIndex = 1; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++AI, ++ArgIndex) if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { Args.push_back(*AI); // Unmodified argument if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } else if (ByValArgsToTransform.count(I)) { // Emit a GEP and load for each element of the struct. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); const StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2, (*AI)->getName()+"."+utostr(i), Call); // TODO: Tell AA about the new values? Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call)); } } else if (!I->use_empty()) { // Non-dead argument: insert GEPs and loads as appropriate. ScalarizeTable &ArgIndices = ScalarizedElements[I]; // Store the Value* version of the indices in here, but declare it now // for reuse. std::vector<Value*> Ops; for (ScalarizeTable::iterator SI = ArgIndices.begin(), E = ArgIndices.end(); SI != E; ++SI) { Value *V = *AI; LoadInst *OrigLoad = OriginalLoads[*SI]; if (!SI->empty()) { Ops.reserve(SI->size()); const Type *ElTy = V->getType(); for (IndicesVector::const_iterator II = SI->begin(), IE = SI->end(); II != IE; ++II) { // Use i32 to index structs, and i64 for others (pointers/arrays). // This satisfies GEP constraints. const Type *IdxTy = (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext()) : Type::getInt64Ty(F->getContext())); Ops.push_back(ConstantInt::get(IdxTy, *II)); // Keep track of the type we're currently indexing. ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II); } // And create a GEP to extract those indices. V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(), V->getName()+".idx", Call); Ops.clear(); AA.copyValue(OrigLoad->getOperand(0), V); } // Since we're replacing a load make sure we take the alignment // of the previous load. LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call); newLoad->setAlignment(OrigLoad->getAlignment()); Args.push_back(newLoad); AA.copyValue(OrigLoad, Args.back()); } } if (ExtraArgHack) Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext()))); // Push any varargs arguments on the list. for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { Args.push_back(*AI); if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } // Add any function attributes. if (Attributes attrs = CallPAL.getFnAttributes()) AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); Instruction *New; if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) { New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), Args.begin(), Args.end(), "", Call); cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv()); cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); } else { New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); cast<CallInst>(New)->setCallingConv(CS.getCallingConv()); cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } Args.clear(); AttributesVec.clear(); // Update the alias analysis implementation to know that we are replacing // the old call with a new one. AA.replaceWithNewValue(Call, New); // Update the callgraph to know that the callsite has been transformed. CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; CalleeNode->replaceCallEdge(Call, New, NF_CGN); if (!Call->use_empty()) { Call->replaceAllUsesWith(New); New->takeName(Call); } // Finally, remove the old call from the program, reducing the use-count of // F. Call->eraseFromParent(); } // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); // Loop over the argument list, transfering uses of the old arguments over to // the new arguments, also transfering over the names as well. // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) { // If this is an unmodified argument, move the name and users over to the // new version. I->replaceAllUsesWith(I2); I2->takeName(I); AA.replaceWithNewValue(I, I2); ++I2; continue; } if (ByValArgsToTransform.count(I)) { // In the callee, we create an alloca, and store each of the new incoming // arguments into the alloca. Instruction *InsertPt = NF->begin()->begin(); // Just add all the struct element types. const Type *AgTy = cast<PointerType>(I->getType())->getElementType(); Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); const StructType *STy = cast<StructType>(AgTy); Value *Idxs[2] = { ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2, TheAlloca->getName()+"."+Twine(i), InsertPt); I2->setName(I->getName()+"."+Twine(i)); new StoreInst(I2++, Idx, InsertPt); } // Anything that used the arg should now use the alloca. I->replaceAllUsesWith(TheAlloca); TheAlloca->takeName(I); AA.replaceWithNewValue(I, TheAlloca); continue; } if (I->use_empty()) { AA.deleteValue(I); continue; } // Otherwise, if we promoted this argument, then all users are load // instructions (or GEPs with only load users), and all loads should be // using the new argument that we added. ScalarizeTable &ArgIndices = ScalarizedElements[I]; while (!I->use_empty()) { if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) { assert(ArgIndices.begin()->empty() && "Load element should sort to front!"); I2->setName(I->getName()+".val"); LI->replaceAllUsesWith(I2); AA.replaceWithNewValue(LI, I2); LI->eraseFromParent(); DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName() << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back()); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) Operands.push_back(cast<ConstantInt>(*II)->getSExtValue()); // GEPs with a single 0 index can be merged with direct loads if (Operands.size() == 1 && Operands.front() == 0) Operands.clear(); Function::arg_iterator TheArg = I2; for (ScalarizeTable::iterator It = ArgIndices.begin(); *It != Operands; ++It, ++TheArg) { assert(It != ArgIndices.end() && "GEP not handled??"); } std::string NewName = I->getName(); for (unsigned i = 0, e = Operands.size(); i != e; ++i) { NewName += "." + utostr(Operands[i]); } NewName += ".val"; TheArg->setName(NewName); DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName() << "' of function '" << NF->getName() << "'\n"); // All of the uses must be load instructions. Replace them all with // the argument specified by ArgNo. while (!GEP->use_empty()) { LoadInst *L = cast<LoadInst>(GEP->use_back()); L->replaceAllUsesWith(TheArg); AA.replaceWithNewValue(L, TheArg); L->eraseFromParent(); } AA.deleteValue(GEP); GEP->eraseFromParent(); } } // Increment I2 past all of the arguments added for this promoted pointer. for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i) ++I2; } // Notify the alias analysis implementation that we inserted a new argument. if (ExtraArgHack) AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), NF->arg_begin()); // Tell the alias analysis that the old function is about to disappear. AA.replaceWithNewValue(F, NF); NF_CGN->stealCalledFunctionsFrom(CG[F]); // Now that the old function is dead, delete it. If there is a dangling // reference to the CallgraphNode, just leave the dead function around for // someone else to nuke. CallGraphNode *CGN = CG[F]; if (CGN->getNumReferences() == 0) delete CG.removeFunctionFromModule(CGN); else F->setLinkage(Function::ExternalLinkage); return NF_CGN; }
void WorklessInstrument::InstrumentWorkless0Star1(Module * pModule, Loop * pLoop) { Function * pMain = NULL; if(strMainName != "" ) { pMain = pModule->getFunction(strMainName.c_str()); } else { pMain = pModule->getFunction("main"); } LoadInst * pLoad; BinaryOperator* pAdd = NULL; StoreInst * pStore = NULL; for (Function::iterator BB = pMain->begin(); BB != pMain->end(); ++BB) { if(BB->getName().equals("entry")) { CallInst * pCall; StoreInst * pStore; Instruction * II = BB->begin(); pCall = CallInst::Create(this->InitHooks, "", II); pCall->setCallingConv(CallingConv::C); pCall->setTailCall(false); AttributeSet emptySet; pCall->setAttributes(emptySet); pCall = CallInst::Create(this->getenv, this->SAMPLE_RATE_ptr, "", II); pCall->setCallingConv(CallingConv::C); pCall->setTailCall(false); AttributeSet AS; { SmallVector<AttributeSet, 4> Attrs; AttributeSet PAS; { AttrBuilder B; B.addAttribute(Attribute::NoUnwind); PAS = AttributeSet::get(pModule->getContext(), ~0U, B); } Attrs.push_back(PAS); AS = AttributeSet::get(pModule->getContext(), Attrs); } pCall->setAttributes(AS); pCall = CallInst::Create(this->function_atoi, pCall, "", II); pCall->setCallingConv(CallingConv::C); pCall->setTailCall(false); { SmallVector<AttributeSet, 4> Attrs; AttributeSet PAS; { AttrBuilder B; B.addAttribute(Attribute::NoUnwind); B.addAttribute(Attribute::ReadOnly); PAS = AttributeSet::get(pModule->getContext(), ~0U, B); } Attrs.push_back(PAS); AS = AttributeSet::get(pModule->getContext(), Attrs); } pCall->setAttributes(AS); pStore = new StoreInst(pCall, this->SAMPLE_RATE, false, II); pStore->setAlignment(4); pCall = CallInst::Create(this->geo, pCall, "", II); pCall->setCallingConv(CallingConv::C); pCall->setTailCall(false); pCall->setAttributes(emptySet); CastInst * pCast = CastInst::CreateIntegerCast(pCall, this->LongType, true, "", II); pStore = new StoreInst(pCast, this->CURRENT_SAMPLE, false, II); pStore->setAlignment(8); vector<Value *> vecParam; vecParam.push_back(this->Output_Format_String); vecParam.push_back(pCall); pCall = CallInst::Create(this->printf, vecParam, "", II); pCall->setCallingConv(CallingConv::C); pCall->setTailCall(false); pCall->setAttributes(emptySet); break; } } for (Function::iterator BB = pMain->begin(); BB != pMain->end(); ++BB) { for (BasicBlock::iterator Ins = BB->begin(); Ins != BB->end(); ++Ins) { if (isa<ReturnInst>(Ins) || isa<ResumeInst>(Ins)) { vector<Value*> vecParams; pLoad = new LoadInst(numIterations, "", false, Ins); pLoad->setAlignment(8); vecParams.push_back(pLoad); pLoad = new LoadInst(numInstances, "", false, Ins); pLoad->setAlignment(8); vecParams.push_back(pLoad); CallInst* pCall = CallInst::Create(this->PrintLoopInfo, vecParams, "", Ins); pCall->setCallingConv(CallingConv::C); pCall->setTailCall(false); AttributeSet aSet; pCall->setAttributes(aSet); } else if(isa<CallInst>(Ins) || isa<InvokeInst>(Ins)) { CallSite cs(Ins); Function * pCalled = cs.getCalledFunction(); if(pCalled == NULL) { continue; } if(pCalled->getName() == "exit" || pCalled->getName() == "_ZL9mysql_endi") { vector<Value*> vecParams; pLoad = new LoadInst(numIterations, "", false, Ins); pLoad->setAlignment(8); vecParams.push_back(pLoad); pLoad = new LoadInst(numInstances, "", false, Ins); pLoad->setAlignment(8); vecParams.push_back(pLoad); CallInst* pCall = CallInst::Create(this->PrintLoopInfo, vecParams, "", Ins); pCall->setCallingConv(CallingConv::C); pCall->setTailCall(false); AttributeSet aSet; pCall->setAttributes(aSet); } } } } BasicBlock * pHeader = pLoop->getHeader(); set<BasicBlock *> setExitBlock; CollectExitBlock(pLoop, setExitBlock); vector<BasicBlock *> vecAdded; CreateIfElseBlock(pLoop, vecAdded); ValueToValueMapTy VMap; set<BasicBlock *> setCloned; CloneInnerLoop(pLoop, vecAdded, VMap, setCloned); BasicBlock * pPreHeader = vecAdded[1]; pLoad = new LoadInst(this->numIterations, "", false, pPreHeader->getTerminator()); pLoad->setAlignment(8); BasicBlock * pClonedHeader = cast<BasicBlock>(VMap[pHeader]); set<BasicBlock *> setPredBlocks; for(pred_iterator PI = pred_begin(pClonedHeader), E = pred_end(pClonedHeader); PI != E; ++PI) { setPredBlocks.insert(*PI); } PHINode * pNew = PHINode::Create(pLoad->getType(), setPredBlocks.size(), "numIterations", pClonedHeader->getFirstInsertionPt()); pAdd = BinaryOperator::Create(Instruction::Add, pNew, this->ConstantLong1, "add", pClonedHeader->getFirstInsertionPt()); set<BasicBlock *>::iterator itSetBegin = setPredBlocks.begin(); set<BasicBlock *>::iterator itSetEnd = setPredBlocks.end(); for(; itSetBegin != itSetEnd; itSetBegin ++ ) { if((*itSetBegin) == pPreHeader) { pNew->addIncoming(pLoad, pPreHeader); } else { pNew->addIncoming(pAdd, *itSetBegin); } } itSetBegin = setExitBlock.begin(); itSetEnd = setExitBlock.end(); for(; itSetBegin != itSetEnd; itSetBegin ++ ) { SmallVector<BasicBlock*, 8> LoopBlocks; for(pred_iterator PI = pred_begin(*itSetBegin), E = pred_end(*itSetBegin); PI != E; ++PI) { if(setCloned.find(*PI) != setCloned.end()) { LoopBlocks.push_back(*PI); } } BasicBlock * NewExitBB = SplitBlockPredecessors(*itSetBegin, LoopBlocks, ".WL.loopexit", this); pStore = new StoreInst(pAdd, this->numIterations, false, NewExitBB->getFirstInsertionPt()); pStore->setAlignment(8); } pPreHeader->getParent()->dump(); }
/// PromoteAliasSet - Try to promote memory values to scalars by sinking /// stores out of the loop and moving loads to before the loop. We do this by /// looping over the stores in the loop, looking for stores to Must pointers /// which are loop invariant. /// void LICM::PromoteAliasSet(AliasSet &AS) { // We can promote this alias set if it has a store, if it is a "Must" alias // set, if the pointer is loop invariant, and if we are not eliminating any // volatile loads or stores. if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() || AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue())) return; assert(!AS.empty() && "Must alias set should have at least one pointer element in it!"); Value *SomePtr = AS.begin()->getValue(); // It isn't safe to promote a load/store from the loop if the load/store is // conditional. For example, turning: // // for () { if (c) *P += 1; } // // into: // // tmp = *P; for () { if (c) tmp +=1; } *P = tmp; // // is not safe, because *P may only be valid to access if 'c' is true. // // It is safe to promote P if all uses are direct load/stores and if at // least one is guaranteed to be executed. bool GuaranteedToExecute = false; SmallVector<Instruction*, 64> LoopUses; SmallPtrSet<Value*, 4> PointerMustAliases; // We start with an alignment of one and try to find instructions that allow // us to prove better alignment. unsigned Alignment = 1; // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) { Value *ASIV = ASI->getValue(); PointerMustAliases.insert(ASIV); // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in // different sizes. if (SomePtr->getType() != ASIV->getType()) return; for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end(); UI != UE; ++UI) { // Ignore instructions that are outside the loop. Instruction *Use = dyn_cast<Instruction>(*UI); if (!Use || !CurLoop->contains(Use)) continue; // If there is an non-load/store instruction in the loop, we can't promote // it. if (LoadInst *load = dyn_cast<LoadInst>(Use)) { assert(!load->isVolatile() && "AST broken"); if (!load->isSimple()) return; } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) { // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (Use->getOperand(1) != ASIV) continue; assert(!store->isVolatile() && "AST broken"); if (!store->isSimple()) return; // Note that we only check GuaranteedToExecute inside the store case // so that we do not introduce stores where they did not exist before // (which would break the LLVM concurrency model). // If the alignment of this instruction allows us to specify a more // restrictive (and performant) alignment and if we are sure this // instruction will be executed, update the alignment. // Larger is better, with the exception of 0 being the best alignment. unsigned InstAlignment = store->getAlignment(); if ((InstAlignment > Alignment || InstAlignment == 0) && (Alignment != 0)) if (isGuaranteedToExecute(*Use)) { GuaranteedToExecute = true; Alignment = InstAlignment; } if (!GuaranteedToExecute) GuaranteedToExecute = isGuaranteedToExecute(*Use); } else return; // Not a load or store. LoopUses.push_back(Use); } } // If there isn't a guaranteed-to-execute instruction, we can't promote. if (!GuaranteedToExecute) return; // Otherwise, this is safe to promote, lets do it! DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n'); Changed = true; ++NumPromoted; // Grab a debug location for the inserted loads/stores; given that the // inserted loads/stores have little relation to the original loads/stores, // this code just arbitrarily picks a location from one, since any debug // location is better than none. DebugLoc DL = LoopUses[0]->getDebugLoc(); SmallVector<BasicBlock*, 8> ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); // We use the SSAUpdater interface to insert phi nodes as required. SmallVector<PHINode*, 16> NewPHIs; SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, *CurAST, DL, Alignment); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. LoadInst *PreheaderLoad = new LoadInst(SomePtr, SomePtr->getName()+".promoted", Preheader->getTerminator()); PreheaderLoad->setAlignment(Alignment); PreheaderLoad->setDebugLoc(DL); SSA.AddAvailableValue(Preheader, PreheaderLoad); // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); // If the SSAUpdater didn't use the load in the preheader, just zap it now. if (PreheaderLoad->use_empty()) PreheaderLoad->eraseFromParent(); }
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) { assert(AI); AtomicOrdering MemOpOrder = AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); Value *Addr = AI->getPointerOperand(); BasicBlock *BB = AI->getParent(); Function *F = BB->getParent(); LLVMContext &Ctx = F->getContext(); // Given: atomicrmw some_op iN* %addr, iN %incr ordering // // The standard expansion we produce is: // [...] // %init_loaded = load atomic iN* %addr // br label %loop // loop: // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] // %new = some_op iN %loaded, %incr // %pair = cmpxchg iN* %addr, iN %loaded, iN %new // %new_loaded = extractvalue { iN, i1 } %pair, 0 // %success = extractvalue { iN, i1 } %pair, 1 // br i1 %success, label %atomicrmw.end, label %loop // atomicrmw.end: // [...] BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end"); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); // This grabs the DebugLoc from AI. IRBuilder<> Builder(AI); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we want a load. It's easiest to just remove // the branch entirely. std::prev(BB->end())->eraseFromParent(); Builder.SetInsertPoint(BB); LoadInst *InitLoaded = Builder.CreateLoad(Addr); // Atomics require at least natural alignment. InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8); Builder.CreateBr(LoopBB); // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); Loaded->addIncoming(InitLoaded, BB); Value *NewVal = performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); Value *NewLoaded = nullptr; Value *Success = nullptr; CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder, Success, NewLoaded); assert(Success && NewLoaded); Loaded->addIncoming(NewLoaded, LoopBB); Builder.CreateCondBr(Success, ExitBB, LoopBB); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); AI->replaceAllUsesWith(NewLoaded); AI->eraseFromParent(); return true; }
Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) { Type *ScalarTy = VL[0]->getType(); if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) ScalarTy = SI->getValueOperand()->getType(); VectorType *VecTy = VectorType::get(ScalarTy, VF); // Check if all of the operands are constants or identical. bool AllConst = true; bool AllSameScalar = true; for (unsigned i = 0, e = VF; i < e; ++i) { AllConst &= !!dyn_cast<Constant>(VL[i]); AllSameScalar &= (VL[0] == VL[i]); // Must have a single use. Instruction *I = dyn_cast<Instruction>(VL[i]); if (I && (I->getNumUses() > 1 || I->getParent() != BB)) return Scalarize(VL, VecTy); } // Is this a simple vector constant. if (AllConst || AllSameScalar) return Scalarize(VL, VecTy); // Scalarize unknown structures. Instruction *VL0 = dyn_cast<Instruction>(VL[0]); if (!VL0) return Scalarize(VL, VecTy); unsigned Opcode = VL0->getOpcode(); for (unsigned i = 0, e = VF; i < e; ++i) { Instruction *I = dyn_cast<Instruction>(VL[i]); // If not all of the instructions are identical then we have to scalarize. if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy); } switch (Opcode) { case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: { ValueList LHSVL, RHSVL; for (int i = 0; i < VF; ++i) { RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); } Value *RHS = vectorizeTree(RHSVL, VF); Value *LHS = vectorizeTree(LHSVL, VF); IRBuilder<> Builder(GetLastInstr(VL, VF)); BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0); return Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); } case Instruction::Load: { LoadInst *LI = dyn_cast<LoadInst>(VL0); unsigned Alignment = LI->getAlignment(); // Check if all of the loads are consecutive. for (unsigned i = 1, e = VF; i < e; ++i) if (!isConsecutiveAccess(VL[i-1], VL[i])) return Scalarize(VL, VecTy); IRBuilder<> Builder(GetLastInstr(VL, VF)); Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), VecTy->getPointerTo()); LI = Builder.CreateLoad(VecPtr); LI->setAlignment(Alignment); return LI; } case Instruction::Store: { StoreInst *SI = dyn_cast<StoreInst>(VL0); unsigned Alignment = SI->getAlignment(); ValueList ValueOp; for (int i = 0; i < VF; ++i) ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand()); Value *VecValue = vectorizeTree(ValueOp, VF); IRBuilder<> Builder(GetLastInstr(VL, VF)); Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo()); Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment); for (int i = 0; i < VF; ++i) cast<Instruction>(VL[i])->eraseFromParent(); return 0; } default: return Scalarize(VL, VecTy); } }
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls) { // Don't insert coverage for unreachable blocks: we will never call // __sanitizer_cov() for them, so counting them in // NumberOfInstrumentedBlocks() might complicate calculation of code coverage // percentage. Also, unreachable instructions frequently have no debug // locations. if (isa<UnreachableInst>(BB.getTerminator())) return; BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end(); // Skip static allocas at the top of the entry block so they don't become // dynamic when we split the block. If we used our optimized stack layout, // then there will only be one alloca and it will come first. for (; IP != BE; ++IP) { AllocaInst *AI = dyn_cast<AllocaInst>(IP); if (!AI || !AI->isStaticAlloca()) break; } bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; if (IsEntryBB) { if (auto SP = getDISubprogram(&F)) EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP); } else { EntryLoc = IP->getDebugLoc(); } IRBuilder<> IRB(IP); IRB.SetCurrentDebugLocation(EntryLoc); SmallVector<Value *, 1> Indices; Value *GuardP = IRB.CreateAdd( IRB.CreatePointerCast(GuardArray, IntptrTy), ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); if (UseCalls) { IRB.CreateCall(SanCovWithCheckFunction, GuardP); } else { LoadInst *Load = IRB.CreateLoad(GuardP); Load->setAtomic(Monotonic); Load->setAlignment(4); SetNoSanitizeMetadata(Load); Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); Instruction *Ins = SplitBlockAndInsertIfThen( Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); IRB.SetInsertPoint(Ins); IRB.SetCurrentDebugLocation(EntryLoc); // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. IRB.CreateCall(SanCovFunction, GuardP); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. } if (Options.Use8bitCounters) { IRB.SetInsertPoint(IP); Value *P = IRB.CreateAdd( IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy()); LoadInst *LI = IRB.CreateLoad(P); Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1)); StoreInst *SI = IRB.CreateStore(Inc, P); SetNoSanitizeMetadata(LI); SetNoSanitizeMetadata(SI); } if (Options.TraceBB) { // Experimental support for tracing. // Insert a callback with the same guard variable as used for coverage. IRB.SetInsertPoint(IP); IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); } }
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls) { // Don't insert coverage for unreachable blocks: we will never call // __sanitizer_cov() for them, so counting them in // NumberOfInstrumentedBlocks() might complicate calculation of code coverage // percentage. Also, unreachable instructions frequently have no debug // locations. if (isa<UnreachableInst>(BB.getTerminator())) return; BasicBlock::iterator IP = BB.getFirstInsertionPt(); bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; if (IsEntryBB) { if (auto SP = getDISubprogram(&F)) EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP); // Keep static allocas and llvm.localescape calls in the entry block. Even // if we aren't splitting the block, it's nice for allocas to be before // calls. IP = PrepareToSplitEntryBlock(BB, IP); } else { EntryLoc = IP->getDebugLoc(); } IRBuilder<> IRB(&*IP); IRB.SetCurrentDebugLocation(EntryLoc); Value *GuardP = IRB.CreateAdd( IRB.CreatePointerCast(GuardArray, IntptrTy), ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); if (Options.TracePC) { IRB.CreateCall(SanCovTracePC); } else if (Options.TraceBB) { IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); } else if (UseCalls) { IRB.CreateCall(SanCovWithCheckFunction, GuardP); } else { LoadInst *Load = IRB.CreateLoad(GuardP); Load->setAtomic(Monotonic); Load->setAlignment(4); SetNoSanitizeMetadata(Load); Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); Instruction *Ins = SplitBlockAndInsertIfThen( Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); IRB.SetInsertPoint(Ins); IRB.SetCurrentDebugLocation(EntryLoc); // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. IRB.CreateCall(SanCovFunction, GuardP); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. } if (Options.Use8bitCounters) { IRB.SetInsertPoint(&*IP); Value *P = IRB.CreateAdd( IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy()); LoadInst *LI = IRB.CreateLoad(P); Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1)); StoreInst *SI = IRB.CreateStore(Inc, P); SetNoSanitizeMetadata(LI); SetNoSanitizeMetadata(SI); } }