SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) { const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc, TLI); if (!FnData) return unknown(); // handle strdup-like functions separately if (FnData->AllocTy == StrDupLike) { // TODO return unknown(); } Value *FirstArg = CS.getArgument(FnData->FstParam); FirstArg = Builder.CreateZExt(FirstArg, IntTy); if (FnData->SndParam < 0) return std::make_pair(FirstArg, Zero); Value *SecondArg = CS.getArgument(FnData->SndParam); SecondArg = Builder.CreateZExt(SecondArg, IntTy); Value *Size = Builder.CreateMul(FirstArg, SecondArg); return std::make_pair(Size, Zero); // TODO: handle more standard functions (+ wchar cousins): // - strdup / strndup // - strcpy / strncpy // - strcat / strncat // - memcpy / memmove // - strcat / strncat // - memset }
SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { Optional<AllocFnsTy> FnData = getAllocationSize(CS.getInstruction(), TLI); if (!FnData) return unknown(); // Handle strdup-like functions separately. if (FnData->AllocTy == StrDupLike) { APInt Size(IntTyBits, GetStringLength(CS.getArgument(0))); if (!Size) return unknown(); // Strndup limits strlen. if (FnData->FstParam > 0) { ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); if (!Arg) return unknown(); APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits); if (Size.ugt(MaxSize)) Size = MaxSize + 1; } return std::make_pair(Size, Zero); } ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); if (!Arg) return unknown(); APInt Size = Arg->getValue(); if (!CheckedZextOrTrunc(Size)) return unknown(); // Size is determined by just 1 parameter. if (FnData->SndParam < 0) return std::make_pair(Size, Zero); Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->SndParam)); if (!Arg) return unknown(); APInt NumElems = Arg->getValue(); if (!CheckedZextOrTrunc(NumElems)) return unknown(); bool Overflow; Size = Size.umul_ov(NumElems, Overflow); return Overflow ? unknown() : std::make_pair(Size, Zero); // TODO: handle more standard functions (+ wchar cousins): // - strdup / strndup // - strcpy / strncpy // - strcat / strncat // - memcpy / memmove // - strcat / strncat // - memset }
CallSite GNUstep::IMPCacher::SplitSend(CallSite msgSend) { BasicBlock *lookupBB = msgSend->getParent(); Function *F = lookupBB->getParent(); Module *M = F->getParent(); Function *send = M->getFunction("objc_msgSend"); Function *send_stret = M->getFunction("objc_msgSend_stret"); Function *send_fpret = M->getFunction("objc_msgSend_fpret"); Value *self; Value *cmd; int selfIndex = 0; if ((msgSend.getCalledFunction() == send) || (msgSend.getCalledFunction() == send_fpret)) { self = msgSend.getArgument(0); cmd = msgSend.getArgument(1); } else if (msgSend.getCalledFunction() == send_stret) { selfIndex = 1; self = msgSend.getArgument(1); cmd = msgSend.getArgument(2); } else { abort(); return CallSite(); } CGBuilder B(&F->getEntryBlock(), F->getEntryBlock().begin()); Value *selfPtr = B.CreateAlloca(self->getType()); B.SetInsertPoint(msgSend.getInstruction()); B.CreateStore(self, selfPtr, true); LLVMType *impTy = msgSend.getCalledValue()->getType(); LLVMType *slotTy = PointerType::getUnqual(StructType::get(PtrTy, PtrTy, PtrTy, IntTy, impTy, PtrTy, NULL)); Value *slot; Constant *lookupFn = M->getOrInsertFunction("objc_msg_lookup_sender", slotTy, selfPtr->getType(), cmd->getType(), PtrTy, NULL); if (msgSend.isCall()) { slot = B.CreateCall3(lookupFn, selfPtr, cmd, Constant::getNullValue(PtrTy)); } else { InvokeInst *inv = cast<InvokeInst>(msgSend.getInstruction()); BasicBlock *callBB = SplitBlock(lookupBB, msgSend.getInstruction(), Owner); removeTerminator(lookupBB); B.SetInsertPoint(lookupBB); slot = B.CreateInvoke3(lookupFn, callBB, inv->getUnwindDest(), selfPtr, cmd, Constant::getNullValue(PtrTy)); addPredecssor(inv->getUnwindDest(), msgSend->getParent(), lookupBB); B.SetInsertPoint(msgSend.getInstruction()); } Value *imp = B.CreateLoad(B.CreateStructGEP(slot, 4)); msgSend.setArgument(selfIndex, B.CreateLoad(selfPtr, true)); msgSend.setCalledFunction(imp); return CallSite(slot); }
SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { const AllocFnsTy *FnData = getAllocationData(CS.getInstruction(), AnyAlloc, TLI); if (!FnData) return unknown(); // handle strdup-like functions separately if (FnData->AllocTy == StrDupLike) { APInt Size(IntTyBits, GetStringLength(CS.getArgument(0))); if (!Size) return unknown(); // strndup limits strlen if (FnData->FstParam > 0) { ConstantInt *Arg= dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); if (!Arg) return unknown(); APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits); if (Size.ugt(MaxSize)) Size = MaxSize + 1; } return std::make_pair(Size, Zero); } ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); if (!Arg) return unknown(); APInt Size = Arg->getValue().zextOrSelf(IntTyBits); // size determined by just 1 parameter if (FnData->SndParam < 0) return std::make_pair(Size, Zero); Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->SndParam)); if (!Arg) return unknown(); Size *= Arg->getValue().zextOrSelf(IntTyBits); return std::make_pair(Size, Zero); // TODO: handle more standard functions (+ wchar cousins): // - strdup / strndup // - strcpy / strncpy // - strcat / strncat // - memcpy / memmove // - strcat / strncat // - memset }
// CallSite is light-weight, and passed by value. void Preparer::fillInAllocationSize(CallSite CS) { // HookMemAlloc(ValueID, Base, Size = undef) Value *Base = CS.getArgument(1); while (BitCastInst *BCI = dyn_cast<BitCastInst>(Base)) { Base = BCI->getOperand(0); } if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { TargetData &TD = getAnalysis<TargetData>(); Value *Size = ConstantInt::get( TD.getIntPtrType(AI->getContext()), TD.getTypeStoreSize(AI->getAllocatedType())); if (AI->isArrayAllocation()) { // e.g. %32 = alloca i8, i64 %conv164 Size = BinaryOperator::Create(Instruction::Mul, Size, AI->getArraySize(), "", AI); } CS.setArgument(2, Size); } else if (DynAAUtils::IsMallocCall(Base)) { CallSite MallocCS(Base); assert(MallocCS); Function *Malloc = MallocCS.getCalledFunction(); assert(Malloc); StringRef MallocName = Malloc->getName(); assert(MallocName == "malloc" || MallocName == "valloc"); CS.setArgument(2, MallocCS.getArgument(0)); } else { // For now, MemoryInstrumenter will only use undef for the allocation size // for AllocaInsts, malloc, and valloc. assert(false); } }
bool InlineMalloc::runOnFunction(Function& F) { Function* Malloc = F.getParent()->getFunction("gcmalloc"); if (!Malloc || Malloc->isDeclaration()) return false; bool Changed = false; for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; BI++) { BasicBlock *Cur = BI; for (BasicBlock::iterator II = Cur->begin(), IE = Cur->end(); II != IE;) { Instruction *I = II; II++; CallSite Call = CallSite::get(I); Instruction* CI = Call.getInstruction(); if (CI) { Function* Temp = Call.getCalledFunction(); if (Temp == Malloc) { if (dyn_cast<Constant>(Call.getArgument(0))) { InlineFunctionInfo IFI(NULL, mvm::MvmModule::TheTargetData); Changed |= InlineFunction(Call, IFI); break; } } } } } return Changed; }
// // Method: visitRuntimeCheck() // // Description: // Visit a call to a run-time check (or related function) and insert pool // arguments where needed. PoolArgc is the number of initial pool arguments // that should be filled at the call site with pool handles for the // corresponding pointer arguments. // void FuncTransform::visitRuntimeCheck (CallSite CS, const unsigned PoolArgc) { // A call to the runtime check should have positions for each pool argument // and the corresponding pointer. assert ((CS.arg_size() >= 2 * PoolArgc) && "Not enough arguments to call of a runtime check!"); for (unsigned PoolIndex = 0; PoolIndex < PoolArgc; ++PoolIndex) { // // Get the pool handle for the pointer argument. // Value *PH = getPoolHandle(CS.getArgument(PoolArgc + PoolIndex)->stripPointerCasts()); // // Insert the pool handle into the run-time check. // if (PH) { Type * Int8Type = Type::getInt8Ty(CS.getInstruction()->getContext()); Type * VoidPtrTy = PointerType::getUnqual(Int8Type); PH = castTo (PH, VoidPtrTy, PH->getName(), CS.getInstruction()); CS.setArgument (PoolIndex, PH); // // Record that we've used the pool here. // AddPoolUse (*(CS.getInstruction()), PH, PoolUses); } } }
void FuncTransform::visitReallocCall(CallSite CS) { assert(CS.arg_end()-CS.arg_begin() == 2 && "realloc takes two arguments!"); Instruction *I = CS.getInstruction(); Value *PH = getPoolHandle(I); Value *OldPtr = CS.getArgument(0); Value *Size = CS.getArgument(1); // Don't poolallocate if we have no pool handle if (PH == 0 || isa<ConstantPointerNull>(PH)) return; if (Size->getType() != Type::getInt32Ty(CS.getInstruction()->getContext())) Size = CastInst::CreateIntegerCast(Size, Type::getInt32Ty(CS.getInstruction()->getContext()), false, Size->getName(), I); static Type *VoidPtrTy = PointerType::getUnqual(Type::getInt8Ty(CS.getInstruction()->getContext())); if (OldPtr->getType() != VoidPtrTy) OldPtr = CastInst::CreatePointerCast(OldPtr, VoidPtrTy, OldPtr->getName(), I); std::string Name = I->getName(); I->setName(""); Value* Opts[3] = {PH, OldPtr, Size}; Instruction *V = CallInst::Create(PAInfo.PoolRealloc, Opts, Name, I); Instruction *Casted = V; if (V->getType() != I->getType()) Casted = CastInst::CreatePointerCast(V, I->getType(), V->getName(), I); // Update def-use info I->replaceAllUsesWith(Casted); // If we are modifying the original function, update the DSGraph. if (!FI.Clone) { // V and Casted now point to whatever the original allocation did. G->getScalarMap().replaceScalar(I, V); if (V != Casted) G->getScalarMap()[Casted] = G->getScalarMap()[V]; } else { // Otherwise, update the NewToOldValueMap UpdateNewToOldValueMap(I, V, V != Casted ? Casted : 0); } // If this was an invoke, fix up the CFG. if (InvokeInst *II = dyn_cast<InvokeInst>(I)) { BranchInst::Create (II->getNormalDest(), I); II->getUnwindDest()->removePredecessor(II->getParent(), true); } // Remove old allocation instruction. I->eraseFromParent(); }
/// OptimizeInlineAsmInst - If there are any memory operands, use /// OptimizeMemoryInst to sink their address computing into the block when /// possible / profitable. bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, DenseMap<Value*,Value*> &SunkAddrs) { bool MadeChange = false; InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); // Do a prepass over the constraints, canonicalizing them, and building up the // ConstraintOperands list. std::vector<InlineAsm::ConstraintInfo> ConstraintInfos = IA->ParseConstraints(); /// ConstraintOperands - Information about all of the constraints. std::vector<TargetLowering::AsmOperandInfo> ConstraintOperands; unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { ConstraintOperands. push_back(TargetLowering::AsmOperandInfo(ConstraintInfos[i])); TargetLowering::AsmOperandInfo &OpInfo = ConstraintOperands.back(); // Compute the value type for each operand. switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.isIndirect) OpInfo.CallOperandVal = CS.getArgument(ArgNo++); break; case InlineAsm::isInput: OpInfo.CallOperandVal = CS.getArgument(ArgNo++); break; case InlineAsm::isClobber: // Nothing to do. break; } // Compute the constraint code and ConstraintType to use. TLI->ComputeConstraintToUse(OpInfo, SDValue(), OpInfo.ConstraintType == TargetLowering::C_Memory); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { Value *OpVal = OpInfo.CallOperandVal; MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs); } } return MadeChange; }
/// visitStrdupCall - Handle strdup(). /// void FuncTransform::visitStrdupCall(CallSite CS) { assert(CS.arg_end()-CS.arg_begin() == 1 && "strdup takes one argument!"); Instruction *I = CS.getInstruction(); assert (getDSNodeHFor(I).getNode() && "strdup has NULL DSNode!\n"); Value *PH = getPoolHandle(I); Type* Int8Type = Type::getInt8Ty(CS.getInstruction()->getContext()); #if 0 assert (PH && "PH for strdup is null!\n"); #else if (!PH) { errs() << "strdup: NoPH\n"; return; } #endif Value *OldPtr = CS.getArgument(0); static Type *VoidPtrTy = PointerType::getUnqual(Int8Type); if (OldPtr->getType() != VoidPtrTy) OldPtr = CastInst::CreatePointerCast(OldPtr, VoidPtrTy, OldPtr->getName(), I); std::string Name = I->getName(); I->setName(""); Value* Opts[3] = {PH, OldPtr, 0}; Instruction *V = CallInst::Create(PAInfo.PoolStrdup, Opts, Name, I); Instruction *Casted = V; if (V->getType() != I->getType()) Casted = CastInst::CreatePointerCast(V, I->getType(), V->getName(), I); // Update def-use info I->replaceAllUsesWith(Casted); // If we are modifying the original function, update the DSGraph. if (!FI.Clone) { // V and Casted now point to whatever the original allocation did. G->getScalarMap().replaceScalar(I, V); if (V != Casted) G->getScalarMap()[Casted] = G->getScalarMap()[V]; } else { // Otherwise, update the NewToOldValueMap UpdateNewToOldValueMap(I, V, V != Casted ? Casted : 0); } // If this was an invoke, fix up the CFG. if (InvokeInst *II = dyn_cast<InvokeInst>(I)) { BranchInst::Create (II->getNormalDest(), I); II->getUnwindDest()->removePredecessor(II->getParent(), true); } // Remove old allocation instruction. I->eraseFromParent(); }
void Preparer::expandMalloc(CallSite CS) { Function *Callee = CS.getCalledFunction(); assert(Callee); StringRef CalleeName = Callee->getName(); if (CalleeName == "malloc" || CalleeName == "valloc") { Value *Size = CS.getArgument(0); Value *ExpandedSize = BinaryOperator::Create( Instruction::Add, Size, ConstantInt::get(cast<IntegerType>(Size->getType()), 1), "expanded.size", CS.getInstruction()); CS.setArgument(0, ExpandedSize); } }
/// AllCalleesPassInValidPointerForArgument - Return true if we can prove that /// all callees pass in a valid pointer for the specified function argument. static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) { Function *Callee = Arg->getParent(); unsigned ArgNo = std::distance(Callee->arg_begin(), Function::arg_iterator(Arg)); // Look at all call sites of the function. At this pointer we know we only // have direct callees. for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end(); UI != E; ++UI) { CallSite CS = CallSite::get(*UI); assert(CS.getInstruction() && "Should only have direct calls!"); if (!IsAlwaysValidPointer(CS.getArgument(ArgNo))) return false; } return true; }
/// getStoredPointerOperand - Return the pointer that is being written to. static Value *getStoredPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) return MI->getDest(); if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { default: llvm_unreachable("Unexpected intrinsic!"); case Intrinsic::init_trampoline: return II->getArgOperand(0); } } CallSite CS = I; // All the supported functions so far happen to have dest as their first // argument. return CS.getArgument(0); }
/// OptimizeInlineAsmInst - If there are any memory operands, use /// OptimizeMemoryInst to sink their address computing into the block when /// possible / profitable. bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, DenseMap<Value*,Value*> &SunkAddrs) { bool MadeChange = false; std::vector<TargetLowering::AsmOperandInfo> TargetConstraints = TLI->ParseConstraints(CS); unsigned ArgNo = 0; for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. TLI->ComputeConstraintToUse(OpInfo, SDValue()); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { Value *OpVal = const_cast<Value *>(CS.getArgument(ArgNo++)); MadeChange |= OptimizeMemoryInst(I, OpVal, OpVal->getType(), SunkAddrs); } else if (OpInfo.Type == InlineAsm::isInput) ArgNo++; } return MadeChange; }
void FuncTransform::visitFreeCall (CallSite & CS) { // // Replace the call to the free() function with a call to poolfree(). // Instruction * InsertPt = CS.getInstruction(); if (Instruction *I = InsertPoolFreeInstr (CS.getArgument(0), InsertPt)) { // Delete the now obsolete free instruction... // FIXME: use "eraseFromParent"? (Note this might require a refactoring) InsertPt->getParent()->getInstList().erase(InsertPt); // Update the NewToOldValueMap if this is a clone // FIXME: Use of utility function UpdateNewToOldValueMap if (!FI.NewToOldValueMap.empty()) { std::map<Value*,const Value*>::iterator II = FI.NewToOldValueMap.find(InsertPt); assert(II != FI.NewToOldValueMap.end() && "free call not found in clone?"); FI.NewToOldValueMap.insert(std::make_pair(I, II->second)); FI.NewToOldValueMap.erase(II); } } }
void FuncTransform::visitMallocCall(CallSite &CS) { // // Get the instruction to which the call site refers // Instruction * MI = CS.getInstruction(); // // Get the pool handle for the node that this contributes to... // // FIXME: This check may be redundant Value *PH = getPoolHandle(MI); if (PH == 0 || isa<ConstantPointerNull>(PH)) return; // // Find the size of the allocation. // Value *AllocSize = CS.getArgument(0); // // Transform the allocation site to use poolalloc(). // TransformAllocationInstr(MI, AllocSize); }
int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) CalleeFI->analyzeFunction(Callee, TD); // InlineCost - This value measures how good of an inline candidate this call // site is to inline. A lower inline cost make is more likely for the call to // be inlined. This value may go negative. // int InlineCost = 0; // Compute any size reductions we can expect due to arguments being passed into // the function. // unsigned ArgNo = 0; CallSite::arg_iterator I = CS.arg_begin(); for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); FI != FE; ++I, ++FI, ++ArgNo) { // If an alloca is passed in, inlining this function is likely to allow // significant future optimization possibilities (like scalar promotion, and // scalarization), so encourage the inlining of the function. // if (isa<AllocaInst>(I)) InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; // If this is a constant being passed into the function, use the argument // weights calculated for the callee to determine how much will be folded // away with this information. else if (isa<Constant>(I)) InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; } const DenseMap<std::pair<unsigned, unsigned>, unsigned> &ArgPairWeights = CalleeFI->PointerArgPairWeights; for (DenseMap<std::pair<unsigned, unsigned>, unsigned>::const_iterator I = ArgPairWeights.begin(), E = ArgPairWeights.end(); I != E; ++I) if (CS.getArgument(I->first.first)->stripInBoundsConstantOffsets() == CS.getArgument(I->first.second)->stripInBoundsConstantOffsets()) InlineCost -= I->second; // Each argument passed in has a cost at both the caller and the callee // sides. Measurements show that each argument costs about the same as an // instruction. InlineCost -= (CS.arg_size() * InlineConstants::InstrCost); // Now that we have considered all of the factors that make the call site more // likely to be inlined, look at factors that make us not want to inline it. // Calls usually take a long time, so they make the inlining gain smaller. InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; // Look at the size of the callee. Each instruction counts as 5. InlineCost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; return InlineCost; }
/// visitMemAlignCall - Handle memalign and posix_memalign. /// void FuncTransform::visitMemAlignCall(CallSite CS) { Instruction *I = CS.getInstruction(); Value *ResultDest = 0; Value *Align = 0; Value *Size = 0; Value *PH; Type* Int8Type = Type::getInt8Ty(CS.getInstruction()->getContext()); Type* Int32Type = Type::getInt32Ty(CS.getInstruction()->getContext()); if (CS.getCalledFunction()->getName() == "memalign") { Align = CS.getArgument(0); Size = CS.getArgument(1); PH = getPoolHandle(I); } else { assert(CS.getCalledFunction()->getName() == "posix_memalign"); ResultDest = CS.getArgument(0); Align = CS.getArgument(1); Size = CS.getArgument(2); assert(0 && "posix_memalign not implemented fully!"); // We need to get the pool descriptor corresponding to *ResultDest. PH = getPoolHandle(I); // Return success always. PointerType * PT = dyn_cast<PointerType>(I->getType()); assert (PT && "memalign() does not return pointer type!\n"); Value *RetVal = ConstantPointerNull::get(PT); I->replaceAllUsesWith(RetVal); static Type *PtrPtr=PointerType::getUnqual(PointerType::getUnqual(Int8Type)); if (ResultDest->getType() != PtrPtr) ResultDest = CastInst::CreatePointerCast(ResultDest, PtrPtr, ResultDest->getName(), I); } if (!Align->getType()->isIntegerTy(32)) Align = CastInst::CreateIntegerCast(Align, Int32Type, false, Align->getName(), I); if (!Size->getType()->isIntegerTy(32)) Size = CastInst::CreateIntegerCast(Size, Int32Type, false, Size->getName(), I); std::string Name = I->getName(); I->setName(""); Value* Opts[3] = {PH, Align, Size}; Instruction *V = CallInst::Create(PAInfo.PoolMemAlign, Opts, Name, I); Instruction *Casted = V; if (V->getType() != I->getType()) Casted = CastInst::CreatePointerCast(V, I->getType(), V->getName(), I); if (ResultDest) new StoreInst(V, ResultDest, I); else I->replaceAllUsesWith(Casted); // If we are modifying the original function, update the DSGraph. if (!FI.Clone) { // V and Casted now point to whatever the original allocation did. G->getScalarMap().replaceScalar(I, V); if (V != Casted) G->getScalarMap()[Casted] = G->getScalarMap()[V]; } else { // Otherwise, update the NewToOldValueMap UpdateNewToOldValueMap(I, V, V != Casted ? Casted : 0); } // If this was an invoke, fix up the CFG. if (InvokeInst *II = dyn_cast<InvokeInst>(I)) { BranchInst::Create (II->getNormalDest(), I); II->getUnwindDest()->removePredecessor(II->getParent(), true); } // Remove old allocation instruction. I->eraseFromParent(); }
void MemoryInstrumenter::instrumentMalloc(const CallSite &CS) { TargetData &TD = getAnalysis<TargetData>(); Function *Callee = CS.getCalledFunction(); assert(DynAAUtils::IsMalloc(Callee)); Instruction *Ins = CS.getInstruction(); // Calculate where to insert. // <Loc> will be the next instruction executed. BasicBlock::iterator Loc; if (!Ins->isTerminator()) { Loc = Ins; ++Loc; } else { assert(isa<InvokeInst>(Ins)); Loc = cast<InvokeInst>(Ins)->getNormalDest()->getFirstNonPHI(); } IRBuilder<> Builder(Loc); Value *Start = NULL; Value *Size = NULL; Value *Success = NULL; // Indicate whether the allocation succeeded. StringRef CalleeName = Callee->getName(); if (CalleeName == "malloc" || CalleeName == "valloc") { Start = Ins; Size = UndefValue::get(LongType); Success = Builder.CreateICmpNE(Ins, ConstantPointerNull::get(CharStarType)); } else if (CalleeName.startswith("_Zn")) { Start = Ins; Size = CS.getArgument(0); } else if (CalleeName == "calloc") { // calloc() takes two size_t, i.e. i64. // Therefore, no need to worry Mul will have two operands with different // types. Also, Size will always be of type i64. Start = Ins; assert(CS.getArgument(0)->getType() == LongType); assert(CS.getArgument(1)->getType() == LongType); Size = BinaryOperator::Create(Instruction::Mul, CS.getArgument(0), CS.getArgument(1), "", Loc); Success = Builder.CreateICmpNE(Ins, ConstantPointerNull::get(CharStarType)); } else if (CalleeName == "memalign" || CalleeName == "realloc") { Start = Ins; Size = CS.getArgument(1); Success = Builder.CreateICmpNE(Ins, ConstantPointerNull::get(CharStarType)); } else if (CalleeName == "strdup" || CalleeName == "__strdup") { Start = Ins; // Use strlen to compute the length of the allocated memory. Value *StrLen = EmitStrLen(Ins, Builder, &TD); // size = strlen(result) + 1 Size = Builder.CreateAdd(StrLen, ConstantInt::get(LongType, 1)); Success = Builder.CreateICmpNE(Ins, ConstantPointerNull::get(CharStarType)); } else if (CalleeName == "getline") { // getline(char **lineptr, size_t *n, FILE *stream) // start = *lineptr // size = *n // succ = (<rv> != -1) Start = Builder.CreateLoad(CS.getArgument(0)); Size = Builder.CreateLoad(CS.getArgument(1)); Success = Builder.CreateICmpNE(Ins, ConstantInt::get(Ins->getType(), -1)); } else { assert(false && "Unhandled malloc function call"); } // start = malloc(size) // if (success) // HookMemAlloc // Loc: instrumentMemoryAllocation(Start, Size, Success, Loc); }
// // Method: visitCallocCall() // // Description: // Transform a call to calloc() to use a pool-allocation version of calloc. // We do this because pool_calloc() must check for a NULL return value before // zeroing out the memory. // void FuncTransform::visitCallocCall (CallSite CS) { // // Ensure that the calloc call has the correct number of arugments. // assert(CS.arg_end()-CS.arg_begin() == 2 && "calloc takes two arguments!"); // // Ensure that the new instruction has the same name as the old one. This is // done by removing the name of the old instruction. // Instruction * I = CS.getInstruction(); std::string Name = I->getName(); I->setName(""); Type* Int32Type = Type::getInt32Ty(CS.getInstruction()->getContext()); // FIXME: Ensure that we use 32/64-bit object length sizes consistently // FIXME: Introduce 'ObjectAllocationSize' variable // or similar instead of repeatedly using same expression Value *V1 = CS.getArgument(0); Value *V2 = CS.getArgument(1); V1 = CastInst::CreateIntegerCast(V1, Int32Type, false, V1->getName(), I); V2 = CastInst::CreateIntegerCast(V2, Int32Type, false, V2->getName(), I); // Get the pool handle-- // Do not change the instruction into a poolalloc() call unless we have a // real pool descriptor Value *PH = getPoolHandle(CS.getInstruction()); if (PH == 0 || isa<ConstantPointerNull>(PH)) return; // // Create call to poolcalloc, and record the use of the pool // Value* Opts[3] = {PH, V1, V2}; Instruction *V = CallInst::Create(PAInfo.PoolCalloc, Opts, Name, I); AddPoolUse(*V, PH, PoolUses); // Cast to the appropriate type if necessary // FIXME: Make use of "castTo" utility function Instruction *Casted = V; if (V->getType() != I->getType()) Casted = CastInst::CreatePointerCast(V, I->getType(), V->getName(), I); // Update def-use info I->replaceAllUsesWith(Casted); // If we are modifying the original function, update the DSGraph. if (!FI.Clone) { // V and Casted now point to whatever the original allocation did. G->getScalarMap().replaceScalar(I, V); if (V != Casted) G->getScalarMap()[Casted] = G->getScalarMap()[V]; } else { // Otherwise, update the NewToOldValueMap UpdateNewToOldValueMap(I, V, V != Casted ? Casted : 0); } // If this was an invoke, fix up the CFG. if (InvokeInst *II = dyn_cast<InvokeInst>(I)) { // FIXME: Assert out since we potentially don't handle "invoke" correctly BranchInst::Create (II->getNormalDest(), I); II->getUnwindDest()->removePredecessor(II->getParent(), true); } // Remove old allocation instruction. I->eraseFromParent(); return; }
/// /// Method: visitIntrinsic() /// /// Description: /// Generate correct DSNodes for calls to LLVM intrinsic functions. /// /// Inputs: /// CS - The CallSite representing the call or invoke to the intrinsic. /// F - A pointer to the function called by the call site. /// /// Return value: /// true - This intrinsic is properly handled by this method. /// false - This intrinsic is not recognized by DSA. /// bool GraphBuilder::visitIntrinsic(CallSite CS, Function *F) { ++NumIntrinsicCall; // // If this is a debug intrinsic, then don't do any special processing. // if (isa<DbgInfoIntrinsic>(CS.getInstruction())) return true; switch (F->getIntrinsicID()) { case Intrinsic::vastart: { visitVAStartInst(CS); return true; } case Intrinsic::vacopy: { // Simply merge the two arguments to va_copy. // This results in loss of precision on the temporaries used to manipulate // the va_list, and so isn't a big deal. In theory we would build a // separate graph for this (like the one created in visitVAStartNode) // and only merge the node containing the variable arguments themselves. DSNodeHandle destNH = getValueDest(CS.getArgument(0)); DSNodeHandle srcNH = getValueDest(CS.getArgument(1)); destNH.mergeWith(srcNH); return true; } case Intrinsic::stacksave: { DSNode * Node = createNode(); Node->setAllocaMarker()->setIncompleteMarker()->setUnknownMarker(); Node->foldNodeCompletely(); setDestTo (*(CS.getInstruction()), Node); return true; } case Intrinsic::stackrestore: getValueDest(CS.getInstruction()).getNode()->setAllocaMarker() ->setIncompleteMarker() ->setUnknownMarker() ->foldNodeCompletely(); return true; case Intrinsic::vaend: case Intrinsic::memcpy: case Intrinsic::memmove: { // Merge the first & second arguments, and mark the memory read and // modified. DSNodeHandle RetNH = getValueDest(*CS.arg_begin()); RetNH.mergeWith(getValueDest(*(CS.arg_begin()+1))); if (DSNode *N = RetNH.getNode()) N->setModifiedMarker()->setReadMarker(); return true; } case Intrinsic::memset: // Mark the memory modified. if (DSNode *N = getValueDest(*CS.arg_begin()).getNode()) N->setModifiedMarker(); return true; case Intrinsic::eh_exception: { DSNode * Node = createNode(); Node->setIncompleteMarker(); Node->foldNodeCompletely(); setDestTo (*(CS.getInstruction()), Node); return true; } case Intrinsic::eh_selector: { for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) { if (isa<PointerType>((*I)->getType())) { DSNodeHandle Ptr = getValueDest(*I); if(Ptr.getNode()) { Ptr.getNode()->setReadMarker(); Ptr.getNode()->setIncompleteMarker(); } } } return true; } case Intrinsic::eh_typeid_for: { DSNodeHandle Ptr = getValueDest(*CS.arg_begin()); Ptr.getNode()->setReadMarker(); Ptr.getNode()->setIncompleteMarker(); return true; } case Intrinsic::prefetch: return true; case Intrinsic::objectsize: return true; // // The return address/frame address aliases with the stack, // is type-unknown, and should // have the unknown flag set since we don't know where it goes. // case Intrinsic::returnaddress: case Intrinsic::frameaddress: { DSNode * Node = createNode(); Node->setAllocaMarker()->setIncompleteMarker()->setUnknownMarker(); Node->foldNodeCompletely(); setDestTo (*(CS.getInstruction()), Node); return true; } // Process lifetime intrinsics case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: case Intrinsic::invariant_end: return true; default: { //ignore pointer free intrinsics if (!isa<PointerType>(F->getReturnType())) { bool hasPtr = false; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E && !hasPtr; ++I) if (isa<PointerType>(I->getType())) hasPtr = true; if (!hasPtr) return true; } DEBUG(errs() << "[dsa:local] Unhandled intrinsic: " << F->getName() << "\n"); assert(0 && "Unhandled intrinsic"); return false; } } }
void HeterotbbTransform::rewrite_invoke_site(Module &M, CallSite &CS,Function *NF,int type) { // create_hetero_clone_void(f); Instruction *OldCall = CS.getInstruction(); Instruction *NewCall; // New Call Instruction created SmallVector<Value*, 16> Args; // Argument lists to the new call //DEBUG(dbgs() << "Old Call Instruction:"); //OldCall->dump(); // Any attributes (parameter attribute list PAL) of the // parallel_for_hetero is #if defined(LLVM_3_2) SmallVector<AttributeWithIndex, 8> AttrVec; // Attributes list to the new call const AttrListPtr &OldCallPAL = CS.getAttributes(); // Add any return attributes. Attributes attrs = OldCallPAL.getRetAttributes(); if (attrs.hasAttributes()) AttrVec.push_back(AttributeWithIndex::get(0, attrs)); #endif SmallVector<AttributeSet, 8> AttrVec; const AttributeSet &OldCallPAL = CS.getAttributes(); // Add any return attributes. if (OldCallPAL.hasAttributes(AttributeSet::ReturnIndex)) AttrVec.push_back(AttributeSet::get(NF->getContext(), OldCallPAL.getRetAttributes())); CallSite::arg_iterator AI = CS.arg_begin(); Args.push_back(CS.getArgument(0)); // num_iters //Args.push_back(CS.getArgument(1)); //params.push_back(CS.getArgument(1)->getType()); //create a new cast from class_name to i8* before the old instruction site CastInst *StrucCast = CastInst::Create(Instruction::BitCast, CS.getArgument(1), PointerType::get(Type::getInt8Ty(M.getContext()), 0), "temp_cast", OldCall); //push the type into the argument list Args.push_back(StrucCast); // struct //push the function as third argument Args.push_back(NF); //NF->dump(); //NF->getType()->dump(); //Args.push_back(CS.getArgument(2)); vector</*const*/ Type *> params; const FunctionType *FTy = NF->getFunctionType(); //#ifndef IVB_64 params.push_back(Type::getInt32Ty(M.getContext())); /*#else params.push_back(Type::getInt64Ty(M.getContext())); #endif*/ params.push_back(PointerType::get(Type::getInt8Ty(M.getContext()),0)); params.push_back(NF->getType()); //params.push_back(Type::getInt32Ty(M.getContext())); /*const*/ Type *RetTy = FTy->getReturnType(); FunctionType *NFty = FunctionType::get(RetTy,params, false); //NF->getType()->dump(); //NFty->dump(); Constant *hetero_f_const; //if (hetero_f_const == NULL) { hetero_f_const = /*cast<Function>*/(M.getOrInsertFunction("offload", NFty)); //} //hetero_f_const->dump(); NewCall = InvokeInst::Create(hetero_f_const,cast<InvokeInst>(OldCall)->getNormalDest(),cast<InvokeInst>(OldCall)->getUnwindDest(), Args, "", OldCall); cast<InvokeInst>(NewCall)->setCallingConv(CS.getCallingConv()); //cast<InvokeInst>(NewCall)->setAttributes(AttrListPtr::get(NF->getContext(), AttrVec)); cast<InvokeInst>(NewCall)->setAttributes(AttributeSet::get(NF->getContext(), AttrVec)); //NewCall->dump(); //NewCall = CallInst::Create(hetero_f_const, Args.begin(), Args.end(), "", OldCall); //NewCall->dump(); //cast<CallInst>(NewCall)->setCallingConv(CS.getCallingConv()); //cast<CallInst>(NewCall)->setAttributes(AttrListPtr::get(AttrVec.begin(), AttrVec.end())); //if (CallInst *c=dyn_cast<CallInst>(OldCall)){ // if(c->isTailCall()) cast<CallInst>(NewCall)->setTailCall(); //} char buf[32]; ConstantInt *ci; //DEBUG(dbgs() << "original scheduler_hint="); //CS.getArgument(2)->dump(); if (ci = dyn_cast<ConstantInt>(CS.getArgument(2))) { DEBUG(dbgs() << "scheduler_hint=" << ci->getZExtValue()); sprintf(buf,"%d",ci->getZExtValue()); } else { DEBUG(dbgs() << "scheduler_hint is not supplied and assumed 0"); sprintf(buf,"%d",0); } Value *e2[] = {MDString::get(M.getContext(),buf)}; MDNode *n2 = MDNode::get(M.getContext(), e2); NewCall->setMetadata("scheduler_hint",n2); if(type==2) { //add meta data for reduction Function *join=get_join_func(M,CS); Value *Elts[] = {MDString::get(M.getContext(), join->getName())}; MDNode *Node = MDNode::get(M.getContext(), Elts); NewCall->setMetadata("join_cpu",Node); Function *Njoin=create_new_join(M,join); Value *Elts1[] = {MDString::get(M.getContext(), Njoin->getName())}; MDNode *Node1 = MDNode::get(M.getContext(), Elts1); NewCall->setMetadata("join_gpu",Node1); char buffer[32]; sprintf(buffer,"%d",object_sizes[NF]); Value *Elts2[] = {MDString::get(M.getContext(),buffer )}; MDNode *Node2 = MDNode::get(M.getContext(), Elts2); NewCall->setMetadata("object_size",Node2); } //NewCall->stripPointerCasts(); //DEBUG(dbgs() << "Newly created instruction:"); //NewCall->dump(); }
SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) { Optional<AllocFnsTy> FnData = getAllocationSize(CS.getInstruction(), TLI); if (!FnData) return unknown(); // Handle strdup-like functions separately. if (FnData->AllocTy == StrDupLike) { APInt Size(IntTyBits, GetStringLength(CS.getArgument(0))); if (!Size) return unknown(); // Strndup limits strlen. if (FnData->FstParam > 0) { ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); if (!Arg) return unknown(); APInt MaxSize = Arg->getValue().zextOrSelf(IntTyBits); if (Size.ugt(MaxSize)) Size = MaxSize + 1; } return std::make_pair(Size, Zero); } ConstantInt *Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->FstParam)); if (!Arg) return unknown(); // When we're compiling N-bit code, and the user uses parameters that are // greater than N bits (e.g. uint64_t on a 32-bit build), we can run into // trouble with APInt size issues. This function handles resizing + overflow // checks for us. auto CheckedZextOrTrunc = [&](APInt &I) { // More bits than we can handle. Checking the bit width isn't necessary, but // it's faster than checking active bits, and should give `false` in the // vast majority of cases. if (I.getBitWidth() > IntTyBits && I.getActiveBits() > IntTyBits) return false; if (I.getBitWidth() != IntTyBits) I = I.zextOrTrunc(IntTyBits); return true; }; APInt Size = Arg->getValue(); if (!CheckedZextOrTrunc(Size)) return unknown(); // Size is determined by just 1 parameter. if (FnData->SndParam < 0) return std::make_pair(Size, Zero); Arg = dyn_cast<ConstantInt>(CS.getArgument(FnData->SndParam)); if (!Arg) return unknown(); APInt NumElems = Arg->getValue(); if (!CheckedZextOrTrunc(NumElems)) return unknown(); bool Overflow; Size = Size.umul_ov(NumElems, Overflow); return Overflow ? unknown() : std::make_pair(Size, Zero); // TODO: handle more standard functions (+ wchar cousins): // - strdup / strndup // - strcpy / strncpy // - strcat / strncat // - memcpy / memmove // - strcat / strncat // - memset }
void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr, MemRef::Callee); if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { Assert(CS.getCallingConv() == F->getCallingConv(), "Undefined behavior: Caller and callee calling convention differ", &I); FunctionType *FT = F->getFunctionType(); unsigned NumActualArgs = CS.arg_size(); Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs : FT->getNumParams() == NumActualArgs, "Undefined behavior: Call argument count mismatches callee " "argument count", &I); Assert(FT->getReturnType() == I.getType(), "Undefined behavior: Call return type mismatches " "callee return type", &I); // Check argument types (in case the callee was casted) and attributes. // TODO: Verify that caller and callee attributes are compatible. Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); for (; AI != AE; ++AI) { Value *Actual = *AI; if (PI != PE) { Argument *Formal = &*PI++; Assert(Formal->getType() == Actual->getType(), "Undefined behavior: Call argument type mismatches " "callee parameter type", &I); // Check that noalias arguments don't alias other arguments. This is // not fully precise because we don't know the sizes of the dereferenced // memory regions. if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasResult Result = AA->alias(*AI, *BI); Assert(Result != MustAlias && Result != PartialAlias, "Unusual: noalias argument aliases another argument", &I); } // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), DL->getABITypeAlignment(Ty), Ty, MemRef::Read | MemRef::Write); } } } } if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { Value *Obj = findValue(*AI, /*OffsetOk=*/true); Assert(!isa<AllocaInst>(Obj), "Undefined behavior: Call with \"tail\" keyword references " "alloca", &I); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) switch (II->getIntrinsicID()) { default: break; // TODO: Check more intrinsics case Intrinsic::memcpy: { MemCpyInst *MCI = cast<MemCpyInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, MCI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, MCI->getAlignment(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial // overlap is not distinguished from the case where nothing is known. uint64_t Size = 0; if (const ConstantInt *Len = dyn_cast<ConstantInt>(findValue(MCI->getLength(), /*OffsetOk=*/false))) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != MustAlias, "Undefined behavior: memcpy source and destination overlap", &I); break; } case Intrinsic::memmove: { MemMoveInst *MMI = cast<MemMoveInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize, MMI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize, MMI->getAlignment(), nullptr, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast<MemSetInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize, MSI->getAlignment(), nullptr, MemRef::Write); break; } case Intrinsic::vastart: Assert(I.getParent()->getParent()->isVarArg(), "Undefined behavior: va_start called in a non-varargs function", &I); visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Write); visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read); break; case Intrinsic::vaend: visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::stackrestore: // Stackrestore doesn't read or write memory, but it sets the // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; } }
/// performCallSlotOptzn - takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // The general transformation to keep in mind is // // call @func(..., src, ...) // memcpy(dest, src, ...) // // -> // // memcpy(dest, src, ...) // call @func(..., dest, ...) // // Since moving the memcpy is technically awkward, we additionally check that // src only holds uninitialized values at the moment of the call, meaning that // the memcpy can be discarded rather than moved. // Deliberately get the source and destination with bitcasts stripped away, // because we'll need to do type comparisons based on the underlying type. Value *cpyDest = cpy->getDest(); Value *cpySrc = cpy->getSource(); CallSite CS = CallSite::get(C); // We need to be able to reason about the size of the memcpy, so we require // that it be a constant. ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength()); if (!cpyLength) return false; // Require that src be an alloca. This simplifies the reasoning considerably. AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); if (!srcAlloca) return false; // Check that all of src is copied to dest. TargetData *TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) return false; uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); if (cpyLength->getZExtValue() < srcSize) return false; // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { // The destination is an alloca. Check it is larger than srcSize. ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); if (!destArraySize) return false; uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) * destArraySize->getZExtValue(); if (destSize < srcSize) return false; } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { // If the destination is an sret parameter then only accesses that are // outside of the returned struct type can trap. if (!A->hasStructRetAttr()) return false; const Type *StructTy = cast<PointerType>(A->getType())->getElementType(); uint64_t destSize = TD->getTypeAllocSize(StructTy); if (destSize < srcSize) return false; } else { return false; } // Check that src is not accessed except via the call and the memcpy. This // guarantees that it holds only undefined values when passed in (so the final // memcpy can be dropped), that it is not read or written between the call and // the memcpy, and that writing beyond the end of it is undefined. SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(), srcAlloca->use_end()); while (!srcUseList.empty()) { User *UI = srcUseList.pop_back_val(); if (isa<BitCastInst>(UI)) { for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) srcUseList.push_back(*I); } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) { if (G->hasAllZeroIndices()) for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) srcUseList.push_back(*I); else return false; } else if (UI != C && UI != cpy) { return false; } } // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. DominatorTree &DT = getAnalysis<DominatorTree>(); if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) if (!DT.dominates(cpyDestInst, C)) return false; // In addition to knowing that the call does not access src in some // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) != AliasAnalysis::NoModRef) return false; // All the checks have passed, so do the transformation. bool changedArgument = false; for (unsigned i = 0; i < CS.arg_size(); ++i) if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { if (cpySrc->getType() != cpyDest->getType()) cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), cpyDest->getName(), C); changedArgument = true; if (CS.getArgument(i)->getType() == cpyDest->getType()) CS.setArgument(i, cpyDest); else CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, CS.getArgument(i)->getType(), cpyDest->getName(), C)); } if (!changedArgument) return false; // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); MD.removeInstruction(C); // Remove the memcpy MD.removeInstruction(cpy); cpy->eraseFromParent(); ++NumMemCpyInstr; return true; }
/// \brief Analyze a call site for potential inlining. /// /// Returns true if inlining this call is viable, and false if it is not /// viable. It computes the cost and adjusts the threshold based on numerous /// factors and heuristics. If this method returns false but the computed cost /// is below the computed threshold, then inlining was forcibly disabled by /// some artifact of the routine. bool CallAnalyzer::analyzeCall(CallSite CS) { ++NumCallsAnalyzed; // Track whether the post-inlining function would have more than one basic // block. A single basic block is often intended for inlining. Balloon the // threshold by 50% until we pass the single-BB phase. bool SingleBB = true; int SingleBBBonus = Threshold / 2; Threshold += SingleBBBonus; // Perform some tweaks to the cost and threshold based on the direct // callsite information. // We want to more aggressively inline vector-dense kernels, so up the // threshold, and we'll lower it if the % of vector instructions gets too // low. assert(NumInstructions == 0); assert(NumVectorInstructions == 0); FiftyPercentVectorBonus = Threshold; TenPercentVectorBonus = Threshold / 2; // Give out bonuses per argument, as the instructions setting them up will // be gone after inlining. for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { if (TD && CS.isByValArgument(I)) { // We approximate the number of loads and stores needed by dividing the // size of the byval type by the target's pointer size. PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); unsigned PointerSize = TD->getPointerSizeInBits(); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; // If it generates more than 8 stores it is likely to be expanded as an // inline memcpy so we take that as an upper bound. Otherwise we assume // one load and one store per word copied. // FIXME: The maxStoresPerMemcpy setting from the target should be used // here instead of a magic number of 8, but it's not available via // DataLayout. NumStores = std::min(NumStores, 8U); Cost -= 2 * NumStores * InlineConstants::InstrCost; } else { // For non-byval arguments subtract off one instruction per call // argument. Cost -= InlineConstants::InstrCost; } } // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); if (OnlyOneCallAndLocalLinkage) Cost += InlineConstants::LastCallToStaticBonus; // If the instruction after the call, or if the normal destination of the // invoke is an unreachable instruction, the function is noreturn. As such, // there is little point in inlining this unless there is literally zero // cost. Instruction *Instr = CS.getInstruction(); if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { if (isa<UnreachableInst>(II->getNormalDest()->begin())) Threshold = 1; } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr))) Threshold = 1; // If this function uses the coldcc calling convention, prefer not to inline // it. if (F.getCallingConv() == CallingConv::Cold) Cost += InlineConstants::ColdccPenalty; // Check if we're done. This can happen due to bonuses and penalties. if (Cost > Threshold) return false; if (F.empty()) return true; Function *Caller = CS.getInstruction()->getParent()->getParent(); // Check if the caller function is recursive itself. for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); U != E; ++U) { CallSite Site(cast<Value>(*U)); if (!Site) continue; Instruction *I = Site.getInstruction(); if (I->getParent()->getParent() == Caller) { IsCallerRecursive = true; break; } } // Track whether we've seen a return instruction. The first return // instruction is free, as at least one will usually disappear in inlining. bool HasReturn = false; // Populate our simplified values by mapping from function arguments to call // arguments with known important simplifications. CallSite::arg_iterator CAI = CS.arg_begin(); for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); FAI != FAE; ++FAI, ++CAI) { assert(CAI != CS.arg_end()); if (Constant *C = dyn_cast<Constant>(CAI)) SimplifiedValues[FAI] = C; Value *PtrArg = *CAI; if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); // We can SROA any pointer arguments derived from alloca instructions. if (isa<AllocaInst>(PtrArg)) { SROAArgValues[FAI] = PtrArg; SROAArgCosts[PtrArg] = 0; } } } NumConstantArgs = SimplifiedValues.size(); NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); NumAllocaArgs = SROAArgValues.size(); // The worklist of live basic blocks in the callee *after* inlining. We avoid // adding basic blocks of the callee which can be proven to be dead for this // particular call site in order to get more accurate cost estimates. This // requires a somewhat heavyweight iteration pattern: we need to walk the // basic blocks in a breadth-first order as we insert live successors. To // accomplish this, prioritizing for small iterations because we exit after // crossing our threshold, we use a small-size optimized SetVector. typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, SmallPtrSet<BasicBlock *, 16> > BBSetVector; BBSetVector BBWorklist; BBWorklist.insert(&F.getEntryBlock()); // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. if (Cost > (Threshold + VectorBonus)) break; BasicBlock *BB = BBWorklist[Idx]; if (BB->empty()) continue; // Handle the terminator cost here where we can track returns and other // function-wide constructs. TerminatorInst *TI = BB->getTerminator(); // We never want to inline functions that contain an indirectbr. This is // incorrect because all the blockaddress's (in static global initializers // for example) would be referring to the original function, and this // indirect jump would jump from the inlined copy of the function into the // original function which is extremely undefined behavior. // FIXME: This logic isn't really right; we can safely inline functions // with indirectbr's as long as no other function or global references the // blockaddress of a block within the current function. And as a QOI issue, // if someone is using a blockaddress without an indirectbr, and that // reference somehow ends up in another function or global, we probably // don't want to inline this function. if (isa<IndirectBrInst>(TI)) return false; if (!HasReturn && isa<ReturnInst>(TI)) HasReturn = true; else Cost += InlineConstants::InstrCost; // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. if (!analyzeBlock(BB)) { if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) return false; // If the caller is a recursive function then we don't want to inline // functions which allocate a lot of stack space because it would increase // the caller stack usage dramatically. if (IsCallerRecursive && AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) return false; break; } // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isConditional()) { Value *Cond = BI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); continue; } } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Value *Cond = SI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); continue; } } // If we're unable to select a particular successor, just count all of // them. for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx) BBWorklist.insert(TI->getSuccessor(TIdx)); // If we had any successors at this point, than post-inlining is likely to // have them as well. Note that we assume any basic blocks which existed // due to branches or switches which folded above will also fold after // inlining. if (SingleBB && TI->getNumSuccessors() > 1) { // Take off the bonus we applied to the threshold. Threshold -= SingleBBBonus; SingleBB = false; } } // If this is a noduplicate call, we can still inline as long as // inlining this would cause the removal of the caller (so the instruction // is not actually duplicated, just moved). if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) return false; Threshold += VectorBonus; return Cost < Threshold; }
void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); // TODO: Check function alignment? visitMemoryReference(I, Callee, 0, 0); if (Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) { Assert1(CS.getCallingConv() == F->getCallingConv(), "Undefined behavior: Caller and callee calling convention differ", &I); const FunctionType *FT = F->getFunctionType(); unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); Assert1(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs : FT->getNumParams() == NumActualArgs, "Undefined behavior: Call argument count mismatches callee " "argument count", &I); // TODO: Check argument types (in case the callee was casted) // TODO: Check ABI-significant attributes. // TODO: Check noalias attribute. // TODO: Check sret attribute. } // TODO: Check the "tail" keyword constraints. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) switch (II->getIntrinsicID()) { default: break; // TODO: Check more intrinsics case Intrinsic::memcpy: { MemCpyInst *MCI = cast<MemCpyInst>(&I); visitMemoryReference(I, MCI->getSource(), MCI->getAlignment(), 0); visitMemoryReference(I, MCI->getDest(), MCI->getAlignment(), 0); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial // overlap is not distinguished from the case where nothing is known. unsigned Size = 0; if (const ConstantInt *Len = dyn_cast<ConstantInt>(MCI->getLength()->stripPointerCasts())) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != AliasAnalysis::MustAlias, "Undefined behavior: memcpy source and destination overlap", &I); break; } case Intrinsic::memmove: { MemMoveInst *MMI = cast<MemMoveInst>(&I); visitMemoryReference(I, MMI->getSource(), MMI->getAlignment(), 0); visitMemoryReference(I, MMI->getDest(), MMI->getAlignment(), 0); break; } case Intrinsic::memset: { MemSetInst *MSI = cast<MemSetInst>(&I); visitMemoryReference(I, MSI->getDest(), MSI->getAlignment(), 0); break; } case Intrinsic::vastart: Assert1(I.getParent()->getParent()->isVarArg(), "Undefined behavior: va_start called in a non-varargs function", &I); visitMemoryReference(I, CS.getArgument(0), 0, 0); break; case Intrinsic::vacopy: visitMemoryReference(I, CS.getArgument(0), 0, 0); visitMemoryReference(I, CS.getArgument(1), 0, 0); break; case Intrinsic::vaend: visitMemoryReference(I, CS.getArgument(0), 0, 0); break; case Intrinsic::stackrestore: visitMemoryReference(I, CS.getArgument(0), 0, 0); break; } }
// // Method: visitCallSite() // // Description: // This method transforms a call site. A call site may either be a call // instruction or an invoke instruction. // // Inputs: // CS - The call site representing the instruction that should be transformed. // void FuncTransform::visitCallSite(CallSite& CS) { const Function *CF = CS.getCalledFunction(); Instruction *TheCall = CS.getInstruction(); bool thread_creation_point = false; // // Get the value that is called at this call site. Strip away any pointer // casts that do not change the representation of the data (i.e., are // lossless casts). // Value * CalledValue = CS.getCalledValue()->stripPointerCasts(); // // The CallSite::getCalledFunction() method is not guaranteed to strip off // pointer casts. If no called function was found, manually strip pointer // casts off of the called value and see if we get a function. If so, this // is a direct call, and we want to update CF accordingly. // if (!CF) CF = dyn_cast<Function>(CalledValue); // // Do not change any inline assembly code. // if (isa<InlineAsm>(TheCall->getOperand(0))) { errs() << "INLINE ASM: ignoring. Hoping that's safe.\n"; return; } // // Ignore calls to NULL pointers or undefined values. // if ((isa<ConstantPointerNull>(CalledValue)) || (isa<UndefValue>(CalledValue))) { errs() << "WARNING: Ignoring call using NULL/Undef function pointer.\n"; return; } // If this function is one of the memory manipulating functions built into // libc, emulate it with pool calls as appropriate. if (CF && CF->isDeclaration()) { std::string Name = CF->getName(); if (Name == "free" || Name == "cfree") { visitFreeCall(CS); return; } else if (Name == "malloc") { visitMallocCall(CS); return; } else if (Name == "calloc") { visitCallocCall(CS); return; } else if (Name == "realloc") { visitReallocCall(CS); return; } else if (Name == "memalign" || Name == "posix_memalign") { visitMemAlignCall(CS); return; } else if (Name == "strdup") { visitStrdupCall(CS); return; } else if (Name == "valloc") { errs() << "VALLOC USED BUT NOT HANDLED!\n"; abort(); } else if (unsigned PoolArgc = PAInfo.getNumInitialPoolArguments(Name)) { visitRuntimeCheck(CS, PoolArgc); return; } else if (Name == "pthread_create") { thread_creation_point = true; // // Get DSNode representing the DSNode of the function pointer Value of // the pthread_create call // DSNode* thread_callee_node = G->getNodeForValue(CS.getArgument(2)).getNode(); if (!thread_callee_node) { assert(0 && "apparently you need this code"); FuncInfo *CFI = PAInfo.getFuncInfo(*CF); thread_callee_node = G->getNodeForValue(CFI->MapValueToOriginal(CS.getArgument(2))).getNode(); } // Fill in CF with the name of one of the functions in thread_callee_node CF = const_cast<Function*>(dyn_cast<Function>(*thread_callee_node->globals_begin())); } } // // We need to figure out which local pool descriptors correspond to the pool // descriptor arguments passed into the function call. Calculate a mapping // from callee DSNodes to caller DSNodes. We construct a partial isomophism // between the graphs to figure out which pool descriptors need to be passed // in. The roots of this mapping is found from arguments and return values. // DataStructures& Graphs = PAInfo.getGraphs(); DSGraph::NodeMapTy NodeMapping; Instruction *NewCall; Value *NewCallee; std::vector<const DSNode*> ArgNodes; DSGraph *CalleeGraph; // The callee graph // For indirect callees, find any callee since all DS graphs have been // merged. if (CF) { // Direct calls are nice and simple. DEBUG(errs() << " Handling direct call: " << *TheCall << "\n"); // // Do not try to add pool handles to the function if it: // a) Already calls a cloned function; or // b) Calls a function which was never cloned. // // For such a call, just replace any arguments that take original functions // with their cloned function poiner values. // FuncInfo *CFI = PAInfo.getFuncInfo(*CF); if (CFI == 0 || CFI->Clone == 0) { // Nothing to transform... visitInstruction(*TheCall); return; } // // Oh, dear. We must add pool descriptors to this direct call. // NewCallee = CFI->Clone; ArgNodes = CFI->ArgNodes; assert ((Graphs.hasDSGraph (*CF)) && "Function has no ECGraph!\n"); CalleeGraph = Graphs.getDSGraph(*CF); } else { DEBUG(errs() << " Handling indirect call: " << *TheCall << "\n"); DSGraph *G = Graphs.getGlobalsGraph(); DSGraph::ScalarMapTy& SM = G->getScalarMap(); // Here we fill in CF with one of the possible called functions. Because we // merged together all of the arguments to all of the functions in the // equivalence set, it doesn't really matter which one we pick. // (If the function was cloned, we have to map the cloned call instruction // in CS back to the original call instruction.) Instruction *OrigInst = cast<Instruction>(getOldValueIfAvailable(CS.getInstruction())); // // Attempt to get one of the function targets of this indirect call site by // looking at the call graph constructed by the points-to analysis. Be // sure to use the original call site from the original function; the // points-to analysis has no information on the clones we've created. // // Also, look for the target that has the greatest number of arguments that // have associated DSNodes. This ensures that we pass the maximum number // of pools possible and prevents us from eliding a pool because we're // examining a target that doesn't need it. // const DSCallGraph & callGraph = Graphs.getCallGraph(); DSCallGraph::callee_iterator I = callGraph.callee_begin(OrigInst); for (; I != callGraph.callee_end(OrigInst); ++I) { for(DSCallGraph::scc_iterator sccii = callGraph.scc_begin(*I), sccee = callGraph.scc_end(*I); sccii != sccee; ++sccii){ if(SM.find(SM.getLeaderForGlobal(*sccii)) == SM.end()) continue; // // Get the information for this function. Since this is coming from // DSA, it should be an original function. // // This call site calls a function, that is not defined in this module if (!(Graphs.hasDSGraph(**sccii))) return; // For all other cases Func Info must exist. PAInfo.getFuncInfo(**sccii); // // If this target takes more DSNodes than the last one we found, then // make *this* target our canonical target. // CF = *sccii; break; } } if(!CF){ const Function *F1 = OrigInst->getParent()->getParent(); F1 = callGraph.sccLeader(&*F1); for(DSCallGraph::scc_iterator sccii = callGraph.scc_begin(F1), sccee = callGraph.scc_end(F1); sccii != sccee; ++sccii){ if(SM.find(SM.getLeaderForGlobal(*sccii)) == SM.end()) continue; // // Get the information for this function. Since this is coming from DSA, // it should be an original function. // // This call site calls a function, that is not defined in this module if (!(Graphs.hasDSGraph(**sccii))) return; // For all other cases Func Info must exist. PAInfo.getFuncInfo(**sccii); // // If this target takes more DSNodes than the last one we found, then // make *this* target our canonical target. // CF = *sccii; } } // Assuming the call graph is always correct. And if the call graph reports, // no callees, we can assume that it is right. // // If we didn't find the callee in the constructed call graph, try // checking in the DSNode itself. // This isn't ideal as it means that this call site didn't have inlining // happen. // // // If we still haven't been able to find a target function of the call site // to transform, do nothing. // // One may be tempted to think that we should always have at least one // target, but this is not true. There are perfectly acceptable (but // strange) programs for which no function targets exist. Function // pointers loaded from undef values, for example, will have no targets. // if (!CF) return; // // It's possible that this program has indirect call targets that are // not defined in this module. Do not transformation for such functions. // if (!(Graphs.hasDSGraph(*CF))) return; // // Get the common graph for the set of functions this call may invoke. // assert ((Graphs.hasDSGraph(*CF)) && "Function has no DSGraph!\n"); CalleeGraph = Graphs.getDSGraph(*CF); #ifndef NDEBUG // Verify that all potential callees at call site have the same DS graph. DSCallGraph::callee_iterator E = Graphs.getCallGraph().callee_end(OrigInst); for (; I != E; ++I) { const Function * F = *I; assert (F); if (!(F)->isDeclaration()) assert(CalleeGraph == Graphs.getDSGraph(**I) && "Callees at call site do not have a common graph!"); } #endif // Find the DS nodes for the arguments that need to be added, if any. FuncInfo *CFI = PAInfo.getFuncInfo(*CF); assert(CFI && "No function info for callee at indirect call?"); ArgNodes = CFI->ArgNodes; if (ArgNodes.empty()) return; // No arguments to add? Transformation is a noop! // Cast the function pointer to an appropriate type! std::vector<Type*> ArgTys(ArgNodes.size(), PoolAllocate::PoolDescPtrTy); for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) ArgTys.push_back((*I)->getType()); FunctionType *FTy = FunctionType::get(TheCall->getType(), ArgTys, false); PointerType *PFTy = PointerType::getUnqual(FTy); // If there are any pool arguments cast the func ptr to the right type. NewCallee = CastInst::CreatePointerCast(CS.getCalledValue(), PFTy, "tmp", TheCall); } // // FIXME: Why do we disable strict checking when calling the // DSGraph::computeNodeMapping() method? // Function::const_arg_iterator FAI = CF->arg_begin(), E = CF->arg_end(); CallSite::arg_iterator AI = CS.arg_begin() + (thread_creation_point ? 3 : 0); CallSite::arg_iterator AE = CS.arg_end(); for ( ; FAI != E && AI != AE; ++FAI, ++AI) if (!isa<Constant>(*AI)) { DSGraph::computeNodeMapping(CalleeGraph->getNodeForValue(FAI), getDSNodeHFor(*AI), NodeMapping, false); } //assert(AI == AE && "Varargs calls not handled yet!"); // Map the return value as well... if (isa<PointerType>(TheCall->getType())) DSGraph::computeNodeMapping(CalleeGraph->getReturnNodeFor(*CF), getDSNodeHFor(TheCall), NodeMapping, false); // This code seems redundant (and crashes occasionally) // There is no reason to map globals here, since they are not passed as // arguments // // Map the nodes that are pointed to by globals. // DSScalarMap &CalleeSM = CalleeGraph->getScalarMap(); // for (DSScalarMap::global_iterator GI = G.getScalarMap().global_begin(), // E = G.getScalarMap().global_end(); GI != E; ++GI) // if (CalleeSM.count(*GI)) // DSGraph::computeNodeMapping(CalleeGraph->getNodeForValue(*GI), // getDSNodeHFor(*GI), // NodeMapping, false); // // Okay, now that we have established our mapping, we can figure out which // pool descriptors to pass in... // // Note: // There used to be code here that would create a new pool before the // function call and destroy it after the function call. This could would // get triggered if bounds checking was disbled or the DSNode for the // argument was an array value. // // I believe that code was incorrect; an argument may have a NULL pool handle // (i.e., no pool handle) because the pool allocation heuristic used simply // decided not to assign that value a pool. The argument may alias data // that should not be freed after the function call is complete, so calling // pooldestroy() after the call would free data, causing dangling pointer // dereference errors. // std::vector<Value*> Args; for (unsigned i = 0, e = ArgNodes.size(); i != e; ++i) { Value *ArgVal = Constant::getNullValue(PoolAllocate::PoolDescPtrTy); if (NodeMapping.count(ArgNodes[i])) { if (DSNode *LocalNode = NodeMapping[ArgNodes[i]].getNode()) if (FI.PoolDescriptors.count(LocalNode)) ArgVal = FI.PoolDescriptors.find(LocalNode)->second; } Args.push_back(ArgVal); } // Add the rest of the arguments unless we're a thread creation point, in which case we only need the pools if(!thread_creation_point) Args.insert(Args.end(), CS.arg_begin(), CS.arg_end()); // // There are circumstances where a function is casted to another type and // then called (que horible). We need to perform a similar cast if the // type doesn't match the number of arguments. // if (Function * NewFunction = dyn_cast<Function>(NewCallee)) { FunctionType * NewCalleeType = NewFunction->getFunctionType(); if (NewCalleeType->getNumParams() != Args.size()) { std::vector<Type *> Types; Type * FuncTy = FunctionType::get (NewCalleeType->getReturnType(), Types, true); FuncTy = PointerType::getUnqual (FuncTy); NewCallee = new BitCastInst (NewCallee, FuncTy, "", TheCall); } } std::string Name = TheCall->getName(); TheCall->setName(""); if(thread_creation_point) { Module *M = CS.getInstruction()->getParent()->getParent()->getParent(); Value* pthread_replacement = M->getFunction("poolalloc_pthread_create"); std::vector<Value*> thread_args; //Push back original thread arguments through the callee thread_args.push_back(CS.getArgument(0)); thread_args.push_back(CS.getArgument(1)); thread_args.push_back(CS.getArgument(2)); //Push back the integer argument saying how many uses there are thread_args.push_back(Constant::getIntegerValue(llvm::Type::getInt32Ty(M->getContext()),APInt(32,Args.size()))); thread_args.insert(thread_args.end(),Args.begin(),Args.end()); thread_args.push_back(CS.getArgument(3)); //Make the thread creation call NewCall = CallInst::Create(pthread_replacement, thread_args, Name,TheCall); } else if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { NewCall = InvokeInst::Create (NewCallee, II->getNormalDest(), II->getUnwindDest(), Args, Name, TheCall); } else { NewCall = CallInst::Create (NewCallee, Args, Name, TheCall); } // Add all of the uses of the pool descriptor for (unsigned i = 0, e = ArgNodes.size(); i != e; ++i) AddPoolUse(*NewCall, Args[i], PoolUses); TheCall->replaceAllUsesWith(NewCall); DEBUG(errs() << " Result Call: " << *NewCall << "\n"); if (!TheCall->getType()->isVoidTy()) { // If we are modifying the original function, update the DSGraph... DSGraph::ScalarMapTy &SM = G->getScalarMap(); DSGraph::ScalarMapTy::iterator CII = SM.find(TheCall); if (CII != SM.end()) { SM[NewCall] = CII->second; SM.erase(CII); // Destroy the CallInst } else if (!FI.NewToOldValueMap.empty()) { // Otherwise, if this is a clone, update the NewToOldValueMap with the new // CI return value. UpdateNewToOldValueMap(TheCall, NewCall); } } else if (!FI.NewToOldValueMap.empty()) { UpdateNewToOldValueMap(TheCall, NewCall); } // // Copy over the calling convention and attributes of the original call // instruction to the new call instruction. // CallSite(NewCall).setCallingConv(CallSite(TheCall).getCallingConv()); TheCall->eraseFromParent(); visitInstruction(*NewCall); }
/// processByValArgument - This is called on every byval argument in call sites. bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { if (TD == 0) return false; // Find out what feeds this byval argument. Value *ByValArg = CS.getArgument(ArgNo); Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType(); uint64_t ByValSize = TD->getTypeAllocSize(ByValTy); MemDepResult DepInfo = MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize), true, CS.getInstruction(), CS.getInstruction()->getParent()); if (!DepInfo.isClobber()) return false; // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by // a memcpy, see if we can byval from the source of the memcpy instead of the // result. MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); if (MDep == 0 || MDep->isVolatile() || ByValArg->stripPointerCasts() != MDep->getDest()) return false; // The length of the memcpy must be larger or equal to the size of the byval. ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) return false; // Get the alignment of the byval. If the call doesn't specify the alignment, // then it is some target specific value that we can't know. unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); if (ByValAlign == 0) return false; // If it is greater than the memcpy, then we check to see if we can force the // source of the memcpy to the alignment we need. If we fail, we bail out. if (MDep->getAlignment() < ByValAlign && getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign) return false; // Verify that the copied-from memory doesn't change in between the memcpy and // the byval call. // memcpy(a <- b) // *b = 42; // foo(*a) // It would be invalid to transform the second memcpy into foo(*b). // // NOTE: This is conservative, it will stop on any read from the source loc, // not just the defining memcpy. MemDepResult SourceDep = MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep), false, CS.getInstruction(), MDep->getParent()); if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) return false; Value *TmpCast = MDep->getSource(); if (MDep->getSource()->getType() != ByValArg->getType()) TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), "tmpcast", CS.getInstruction()); DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n" << " " << *MDep << "\n" << " " << *CS.getInstruction() << "\n"); // Otherwise we're good! Update the byval argument. CS.setArgument(ArgNo, TmpCast); ++NumMemCpyInstr; return true; }
/// SurveyUse - This looks at a single use of an argument or return value /// and determines if it should be alive or not. Adds this use to MaybeLiveUses /// if it causes the used value to become MaybeAlive. /// /// RetValNum is the return value number to use when this use is used in a /// return instruction. This is used in the recursion, you should always leave /// it at 0. DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses, unsigned RetValNum) { Value *V = *U; if (ReturnInst *RI = dyn_cast<ReturnInst>(V)) { // The value is returned from a function. It's only live when the // function's return value is live. We use RetValNum here, for the case // that U is really a use of an insertvalue instruction that uses the // orginal Use. RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum); // We might be live, depending on the liveness of Use. return MarkIfNotLive(Use, MaybeLiveUses); } if (InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) { if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex() && IV->hasIndices()) // The use we are examining is inserted into an aggregate. Our liveness // depends on all uses of that aggregate, but if it is used as a return // value, only index at which we were inserted counts. RetValNum = *IV->idx_begin(); // Note that if we are used as the aggregate operand to the insertvalue, // we don't change RetValNum, but do survey all our uses. Liveness Result = MaybeLive; for (Value::use_iterator I = IV->use_begin(), E = V->use_end(); I != E; ++I) { Result = SurveyUse(I, MaybeLiveUses, RetValNum); if (Result == Live) break; } return Result; } CallSite CS = CallSite::get(V); if (CS.getInstruction()) { Function *F = CS.getCalledFunction(); if (F) { // Used in a direct call. // Find the argument number. We know for sure that this use is an // argument, since if it was the function argument this would be an // indirect call and the we know can't be looking at a value of the // label type (for the invoke instruction). unsigned ArgNo = CS.getArgumentNo(U.getOperandNo()); if (ArgNo >= F->getFunctionType()->getNumParams()) // The value is passed in through a vararg! Must be live. return Live; assert(CS.getArgument(ArgNo) == CS.getInstruction()->getOperand(U.getOperandNo()) && "Argument is not where we expected it"); // Value passed to a normal call. It's only live when the corresponding // argument to the called function turns out live. RetOrArg Use = CreateArg(F, ArgNo); return MarkIfNotLive(Use, MaybeLiveUses); } } // Used in any other way? Value must be live. return Live; }