/// Infer nonnull attributes for the arguments at the specified callsite. static bool processCallSite(CallSite CS, LazyValueInfo *LVI) { SmallVector<unsigned, 4> Indices; unsigned ArgNo = 0; for (Value *V : CS.args()) { PointerType *Type = dyn_cast<PointerType>(V->getType()); // Try to mark pointer typed parameters as non-null. We skip the // relatively expensive analysis for constants which are obviously either // null or non-null to start with. if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) && !isa<Constant>(V) && LVI->getPredicateAt(ICmpInst::ICMP_EQ, V, ConstantPointerNull::get(Type), CS.getInstruction()) == LazyValueInfo::False) Indices.push_back(ArgNo + 1); ArgNo++; } assert(ArgNo == CS.arg_size() && "sanity check"); if (Indices.empty()) return false; AttributeSet AS = CS.getAttributes(); LLVMContext &Ctx = CS.getInstruction()->getContext(); AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull)); CS.setAttributes(AS); return true; }
// // Method: visitRuntimeCheck() // // Description: // Visit a call to a run-time check (or related function) and insert pool // arguments where needed. PoolArgc is the number of initial pool arguments // that should be filled at the call site with pool handles for the // corresponding pointer arguments. // void FuncTransform::visitRuntimeCheck (CallSite CS, const unsigned PoolArgc) { // A call to the runtime check should have positions for each pool argument // and the corresponding pointer. assert ((CS.arg_size() >= 2 * PoolArgc) && "Not enough arguments to call of a runtime check!"); for (unsigned PoolIndex = 0; PoolIndex < PoolArgc; ++PoolIndex) { // // Get the pool handle for the pointer argument. // Value *PH = getPoolHandle(CS.getArgument(PoolArgc + PoolIndex)->stripPointerCasts()); // // Insert the pool handle into the run-time check. // if (PH) { Type * Int8Type = Type::getInt8Ty(CS.getInstruction()->getContext()); Type * VoidPtrTy = PointerType::getUnqual(Int8Type); PH = castTo (PH, VoidPtrTy, PH->getName(), CS.getInstruction()); CS.setArgument (PoolIndex, PH); // // Record that we've used the pool here. // AddPoolUse (*(CS.getInstruction()), PH, PoolUses); } } }
/// \brief Try to simplify a call site. /// /// Takes a concrete function and callsite and tries to actually simplify it by /// analyzing the arguments and call itself with instsimplify. Returns true if /// it has simplified the callsite to some other entity (a constant), making it /// free. bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { // FIXME: Using the instsimplify logic directly for this is inefficient // because we have to continually rebuild the argument list even when no // simplifications can be performed. Until that is fixed with remapping // inside of instsimplify, directly constant fold calls here. if (!canConstantFoldCallTo(F)) return false; // Try to re-map the arguments to constants. SmallVector<Constant *, 4> ConstantArgs; ConstantArgs.reserve(CS.arg_size()); for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) { Constant *C = dyn_cast<Constant>(*I); if (!C) C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(*I)); if (!C) return false; // This argument doesn't map to a constant. ConstantArgs.push_back(C); } if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { SimplifiedValues[CS.getInstruction()] = C; return true; } return false; }
void MemoryInstrumenter::instrumentCallSite(CallSite CS) { IDAssigner &IDA = getAnalysis<IDAssigner>(); unsigned InsID = IDA.getInstructionID(CS.getInstruction()); assert(InsID != IDAssigner::InvalidID); int NumCallingArgs = CS.arg_size(); vector<Value *> Args; Args.push_back(ConstantInt::get(IntType, InsID)); Args.push_back(ConstantInt::get(IntType, NumCallingArgs)); CallInst::Create(CallHook, Args, "", CS.getInstruction()); }
// Add calls to the registration functions around this call site. void RegisterVarargCallSites::registerCallSite(Module &M, CallSite &CS) { // Insert the registration intrinsics. if (registrationFunc == 0 || unregistrationFunc == 0) makeRegistrationFunctions(M); Instruction *inst = CS.getInstruction(); LLVMContext &C = M.getContext(); Type *VoidPtrTy = Type::getInt8PtrTy(C); Type *Int32Ty = Type::getInt32Ty(C); // Build the argument vector to vacallregister. vector<Value *> vaCallRegisterArgs(2); Value *dest = CS.getCalledValue(); Value *destPtr; // Get the function pointer casted to i8*. if (isa<Constant>(dest)) destPtr = ConstantExpr::getPointerCast(cast<Constant>(dest), VoidPtrTy); else destPtr = new BitCastInst(dest, VoidPtrTy, "", inst); vaCallRegisterArgs[0] = destPtr; vaCallRegisterArgs[1] = ConstantInt::get(Int32Ty, CS.arg_size()); // Register all the pointer arguments to this function call as well. set<Value *> pointerArguments; CallSite::arg_iterator arg = CS.arg_begin(); CallSite::arg_iterator end = CS.arg_end(); for (; arg != end; ++arg) { Value *argval = *arg; if (isa<PointerType>(argval->getType())) { if (pointerArguments.find(argval) == pointerArguments.end()) { pointerArguments.insert(argval); vaCallRegisterArgs.push_back(argval); } } } // End the argument list with a NULL parameter. vaCallRegisterArgs.push_back( ConstantPointerNull::get(cast<PointerType>(VoidPtrTy)) ); // Add the registration call before the call site. CallInst::Create(registrationFunc, vaCallRegisterArgs, "", inst); // Add the unregistration call after the call site. Instruction *unreg = CallInst::Create(unregistrationFunc); unreg->insertAfter(inst); return; }
/// processCallSite - Infer nonnull attributes for the arguments at the /// specified callsite. bool CorrelatedValuePropagation::processCallSite(CallSite CS) { bool Changed = false; unsigned ArgNo = 0; for (Value *V : CS.args()) { PointerType *Type = dyn_cast<PointerType>(V->getType()); if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) && LVI->getPredicateAt(ICmpInst::ICMP_EQ, V, ConstantPointerNull::get(Type), CS.getInstruction()) == LazyValueInfo::False) { AttributeSet AS = CS.getAttributes(); AS = AS.addAttribute(CS.getInstruction()->getContext(), ArgNo + 1, Attribute::NonNull); CS.setAttributes(AS); Changed = true; } ArgNo++; } assert(ArgNo == CS.arg_size() && "sanity check"); return Changed; }
bool CallAnalyzer::visitCallSite(CallSite CS) { if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() && !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::ReturnsTwice)) { // This aborts the entire analysis. ExposesReturnsTwice = true; return false; } if (CS.isCall() && cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) ContainsNoDuplicateCall = true; if (Function *F = CS.getCalledFunction()) { // When we have a concrete function, first try to simplify it directly. if (simplifyCallSite(F, CS)) return true; // Next check if it is an intrinsic we know about. // FIXME: Lift this into part of the InstVisitor. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { switch (II->getIntrinsicID()) { default: return Base::visitCallSite(CS); case Intrinsic::memset: case Intrinsic::memcpy: case Intrinsic::memmove: // SROA can usually chew through these intrinsics, but they aren't free. return false; } } if (F == CS.getInstruction()->getParent()->getParent()) { // This flag will fully abort the analysis, so don't bother with anything // else. IsRecursiveCall = true; return false; } if (!callIsSmall(CS)) { // We account for the average 1 instruction per call argument setup // here. Cost += CS.arg_size() * InlineConstants::InstrCost; // Everything other than inline ASM will also have a significant cost // merely from making the call. if (!isa<InlineAsm>(CS.getCalledValue())) Cost += InlineConstants::CallPenalty; } return Base::visitCallSite(CS); } // Otherwise we're in a very special case -- an indirect function call. See // if we can be particularly clever about this. Value *Callee = CS.getCalledValue(); // First, pay the price of the argument setup. We account for the average // 1 instruction per call argument setup here. Cost += CS.arg_size() * InlineConstants::InstrCost; // Next, check if this happens to be an indirect function call to a known // function in this inline context. If not, we've done all we can. Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee)); if (!F) return Base::visitCallSite(CS); // If we have a constant that we are calling as a function, we can peer // through it and see the function target. This happens not infrequently // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan // out. Pretend to inline the function, with a custom threshold. CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the // bonus we want to apply, but don't go below zero. Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); } return Base::visitCallSite(CS); }
// InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. // // Note that this only does one level of inlining. For example, if the // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now // exists in the instruction stream. Similiarly this will inline a recursive // function by one level. // bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { if (!Caller->hasGC()) Caller->setGC(CalledFunc->getGC()); else if (CalledFunc->getGC() != Caller->getGC()) return false; } // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. // Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned // function. std::vector<ReturnInst*> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy ValueMap after cloning. DenseMap<const Value*, Value*> ValueMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) && !CalledFunc->onlyReadsMemory()) { const Type *AggTy = cast<PointerType>(I->getType())->getElementType(); const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty); // Create the alloca. If we have TargetData, use nice alignment. unsigned Align = 1; if (TD) Align = TD->getPrefTypeAlignment(AggTy); Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), Caller->begin()->begin()); // Emit a memcpy. const Type *Tys[] = { Type::Int64Ty }; Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), Intrinsic::memcpy, Tys, 1); Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); Value *Size; if (TD == 0) Size = ConstantExpr::getSizeOf(AggTy); else Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. Value *CallArgs[] = { DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1) }; CallInst *TheMemCpy = CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall); // If we have a call graph, update it. if (CG) { CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); CallGraphNode *CallerNode = (*CG)[Caller]; CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); } // Uses of the argument in the function should use our new alloca // instead. ActualArg = NewAlloca; } ValueMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i", &InlinedFunctionInfo, TD); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. if (CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, *CG); } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. // { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) { // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (isa<Constant>(AI->getArraySize())) { // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) ++I; // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. Caller->getEntryBlock().getInstList().splice( InsertPoint, FirstNewBlock->getInstList(), AI, I); } } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. Constant *StackSave, *StackRestore; StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); StackRestore = Intrinsic::getDeclaration(M, Intrinsic::stackrestore); // If we are preserving the callgraph, add edges to the stacksave/restore // functions for the calls we insert. CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0; if (CG) { // We know that StackSave/StackRestore are Function*'s, because they are // intrinsics which must have the right types. StackSaveCGN = CG->getOrInsertFunction(cast<Function>(StackSave)); StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore)); CallerNode = (*CG)[Caller]; } // Insert the llvm.stacksave. CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack", FirstNewBlock->begin()); if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]); if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); } // Count the number of StackRestore calls we insert. unsigned NumStackRestores = Returns.size(); // If we are inlining an invoke instruction, insert restores before each // unwind. These unwinds will be rewritten into branches later. if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { CallInst::Create(StackRestore, SavedPtr, "", UI); ++NumStackRestores; } } } // If we are inlining tail call instruction through a call site that isn't // marked 'tail', we must remove the tail marker for any calls in the inlined // code. Also, calls inlined through a 'nounwind' call site should be marked // 'nounwind'. if (InlinedFunctionInfo.ContainsCalls && (MustClearTailCallFlags || MarkNoUnwind)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (MustClearTailCallFlags) CI->setTailCall(false); if (MarkNoUnwind) CI->setDoesNotThrow(); } } // If we are inlining through a 'nounwind' call site then any inlined 'unwind' // instructions are unreachable. if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind) for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) { TerminatorInst *Term = BB->getTerminator(); if (isa<UnwindInst>(Term)) { new UnreachableInst(Term); BB->getInstList().erase(Term); } } // If we are inlining for an invoke instruction, we must make sure to rewrite // any inlined 'unwind' instructions into branches to the invoke exception // destination, and call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) BranchInst::Create(II->getNormalDest(), TheCall); // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!TheCall->use_empty()) { ReturnInst *R = Returns[0]; TheCall->replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); // We are now done with the inlining. return true; } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(NewBr, CalledFunc->getName()+".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(TheCall, CalledFunc->getName()+".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. const Type *RTy = CalledFunc->getReturnType(); if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. PHINode *PHI = 0; if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node // as appropriate. if (PHI) { for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } } // Add a branch to the merge points and remove return instructions. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; BranchInst::Create(AfterCallBB, RI); RI->eraseFromParent(); } } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!TheCall->use_empty()) TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. BasicBlock *ReturnBB = Returns[0]->getParent(); AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); // Update PHI nodes that use the ReturnBB to use the AfterCallBB. ReturnBB->replaceAllUsesWith(AfterCallBB); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!TheCall->use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes // Remove the unconditional branch. OrigBB->getInstList().erase(Br); // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); return true; }
static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI) { if (!CS.arg_size() || !canSplitCallSite(CS, TTI)) return false; return tryToSplitOnPredicatedArgument(CS) || tryToSplitOnPHIPredicatedArgument(CS); }
bool CSDataRando::processCallSite(CallSite CS, FuncInfo &FI, PointerEquivalenceAnalysis &P, DSGraph *G) { bool IndirectCall = !isa<Function>(CS.getCalledValue()->stripPointerCasts()); if (IndirectCall) { NumIndirectCalls++; } CallSite OriginalCS = originalCallSite(FI, CS); if (!DSA->canEncryptCall(OriginalCS)) { if (IndirectCall) { NumIndirectCantEncrypt++; } return false; } DSCallSite DSCS = G->getDSCallSiteForCallSite(OriginalCS); const Function *Callee = getEffectiveCallee(DSCS, FI, G); if (!Callee) { if (IndirectCall) { NumIndirectCantEncrypt++; } return false; } FuncInfo &CalleeInfo = FunctionInfo[Callee]; Value *Clone = getCloneCalledValue(CS, CalleeInfo); if (!Clone || CalleeInfo.ArgNodes.empty()) { if (IndirectCall) { NumIndirectCantEncrypt++; } return false; } // We create a mapping of the formal argument nodes in the callee function and // actual argument nodes in the caller function's graph. DSGraph::NodeMapTy NodeMap; DSGraph *CalleeG = DSA->getDSGraph(*Callee); // getArgNodesForCall places the return node and the vanode in the // first two slots of the vector, followed by the nodes for the regular // pointer arguments. std::vector<DSNodeHandle> ArgNodes; getArgNodesForCall(CalleeG, DSCS, ArgNodes); // First the return value DSNodeHandle CalleeRetNode = ArgNodes[0]; DSGraph::computeNodeMapping(CalleeRetNode, DSCS.getRetVal(), NodeMap); // Then VarArgs DSNodeHandle CalleeVarArgNode = ArgNodes[1]; DSGraph::computeNodeMapping(CalleeVarArgNode, DSCS.getVAVal(), NodeMap); // And last the regular arguments. for (unsigned int i = 0; i < DSCS.getNumPtrArgs() && i + 2 < ArgNodes.size(); i++) { DSGraph::computeNodeMapping(ArgNodes[i + 2], DSCS.getPtrArg(i), NodeMap); } // Collect the arguments and masks to pass to call SmallVector<Value*, 8> Args; unsigned int i = 0; for (unsigned int e = CS.getFunctionType()->getNumParams(); i < e; i++) { Args.push_back(CS.getArgOperand(i)); } for (const DSNode *N : CalleeInfo.ArgNodes) { Value *Mask = P.getMaskForNode(NodeMap[N]); Args.push_back(Mask); } // VarArgs go after masks for (unsigned int e = CS.arg_size(); i < e; i++) { Args.push_back(CS.getArgOperand(i)); } // Do replacement Instruction *CI = CS.getInstruction(); Value *Call; if (CS.isCall()) { Call = CallInst::Create(Clone, Args, "", CI); } else { InvokeInst *II = cast<InvokeInst>(CI); Call = InvokeInst::Create(Clone, II->getNormalDest(), II->getUnwindDest(), Args, "", II); } CallSite NewCS(Call); NewCS.setCallingConv(CS.getCallingConv()); CI->replaceAllUsesWith(Call); P.replace(CI, Call); CI->eraseFromParent(); return true; }
void MetadataTransform(CallSite &CS, const TargetData *TD) { Instruction *I = CS.getInstruction(); Module *M = I->getParent()->getParent()->getParent(); LLVMContext &Ctx = I->getContext(); unsigned argIdx = 0; if (CS.arg_size() < 2) report_fatal_error(I, "_SYS_lti_metadata requires at least two args"); // Parse the 'key' parameter ConstantInt *CI = dyn_cast<ConstantInt>(CS.getArgument(argIdx++)); if (!CI) report_fatal_error(I, "Metadata key must be a constant integer."); uint16_t key = CI->getZExtValue(); if (key != CI->getZExtValue()) report_fatal_error(I, "Metadata key argument is too large."); // Parse the 'fmt' parameter std::string fmt; if (!GetConstantStringInfo(CS.getArgument(argIdx++), fmt)) report_fatal_error(I, "Metadata format must be a constant string."); if (fmt.size() != CS.arg_size() - 2) report_fatal_error(I, "Length of metadata format must match number of parameters"); if (fmt.size() == 0) report_fatal_error(I, "Empty metadata values are not supported"); /* * Parse every other parameter according to the format string */ SmallVector<Constant*, 8> Members; unsigned align = 1; for (std::string::iterator FI = fmt.begin(), FE = fmt.end(); FI != FE; ++FI, argIdx++) { Constant *Arg = dyn_cast<Constant>(CS.getArgument(argIdx)); Constant *C; if (!Arg) report_fatal_error(I, "Metadata argument " + Twine(argIdx+1) + " is not constant"); /* * First, non-integer types */ switch (*FI) { case 's': { std::string str; if (!GetConstantStringInfo(Arg, str)) report_fatal_error(I, "Metadata formatter 's' requires a constant string"); Members.push_back(ConstantArray::get(Ctx, str, false)); continue; } } /* * Integer types */ if (Arg->getType()->isPointerTy()) Arg = ConstantExpr::getPointerCast(Arg, Type::getInt32Ty(Ctx)); if (!Arg->getType()->isIntegerTy()) report_fatal_error(I, "Metadata argument " + Twine(argIdx+1) + " can't be converted to an integer type."); switch (*FI) { case 'b': C = ConstantExpr::getIntegerCast(Arg, Type::getInt8Ty(Ctx), true); break; case 'B': C = ConstantExpr::getIntegerCast(Arg, Type::getInt8Ty(Ctx), false); break; case 'h': C = ConstantExpr::getIntegerCast(Arg, Type::getInt16Ty(Ctx), true); break; case 'H': C = ConstantExpr::getIntegerCast(Arg, Type::getInt16Ty(Ctx), false); break; case 'i': C = ConstantExpr::getIntegerCast(Arg, Type::getInt32Ty(Ctx), true); break; case 'I': C = ConstantExpr::getIntegerCast(Arg, Type::getInt32Ty(Ctx), false); break; default: report_fatal_error(I, "Unsupported format character '" + Twine(*FI) + "' in metadata"); } align = std::max(align, TD->getABITypeAlignment(C->getType())); Members.push_back(C); } Constant *Struct = ConstantStruct::getAnon(Members); /* * Install this metadata item as a global variable in a special section */ GlobalVariable *GV = new GlobalVariable(*M, Struct->getType(), true, GlobalValue::ExternalLinkage, Struct, "", 0, false); GV->setAlignment(align); GV->setName(SVMDecorations::META + Twine(key) + SVMDecorations::SEPARATOR); GV->setSection(".metadata"); // Remove the original _SYS_lti_metadata() call I->eraseFromParent(); }
void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr, MemRef::Callee); if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { Assert(CS.getCallingConv() == F->getCallingConv(), "Undefined behavior: Caller and callee calling convention differ", &I); FunctionType *FT = F->getFunctionType(); unsigned NumActualArgs = CS.arg_size(); Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs : FT->getNumParams() == NumActualArgs, "Undefined behavior: Call argument count mismatches callee " "argument count", &I); Assert(FT->getReturnType() == I.getType(), "Undefined behavior: Call return type mismatches " "callee return type", &I); // Check argument types (in case the callee was casted) and attributes. // TODO: Verify that caller and callee attributes are compatible. Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); for (; AI != AE; ++AI) { Value *Actual = *AI; if (PI != PE) { Argument *Formal = &*PI++; Assert(Formal->getType() == Actual->getType(), "Undefined behavior: Call argument type mismatches " "callee parameter type", &I); // Check that noalias arguments don't alias other arguments. This is // not fully precise because we don't know the sizes of the dereferenced // memory regions. if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasResult Result = AA->alias(*AI, *BI); Assert(Result != MustAlias && Result != PartialAlias, "Unusual: noalias argument aliases another argument", &I); } // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), DL->getABITypeAlignment(Ty), Ty, MemRef::Read | MemRef::Write); } } } } if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { Value *Obj = findValue(*AI, /*OffsetOk=*/true); Assert(!isa<AllocaInst>(Obj), "Undefined behavior: Call with \"tail\" keyword references " "alloca", &I); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) switch (II->getIntrinsicID()) { default: break; // TODO: Check more intrinsics case Intrinsic::memcpy: { MemCpyInst *MCI = cast<MemCpyInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, MCI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, MCI->getAlignment(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial // overlap is not distinguished from the case where nothing is known. uint64_t Size = 0; if (const ConstantInt *Len = dyn_cast<ConstantInt>(findValue(MCI->getLength(), /*OffsetOk=*/false))) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != MustAlias, "Undefined behavior: memcpy source and destination overlap", &I); break; } case Intrinsic::memmove: { MemMoveInst *MMI = cast<MemMoveInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize, MMI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize, MMI->getAlignment(), nullptr, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast<MemSetInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize, MSI->getAlignment(), nullptr, MemRef::Write); break; } case Intrinsic::vastart: Assert(I.getParent()->getParent()->isVarArg(), "Undefined behavior: va_start called in a non-varargs function", &I); visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Write); visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read); break; case Intrinsic::vaend: visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::stackrestore: // Stackrestore doesn't read or write memory, but it sets the // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; } }
/// InlineFunction - This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs /// though. /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { if (!Caller->hasGC()) Caller->setGC(CalledFunc->getGC()); else if (CalledFunc->getGC() != Caller->getGC()) return false; } // Get the personality function from the callee if it contains a landing pad. Value *CalleePersonality = 0; for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); const LandingPadInst *LP = BB->getLandingPadInst(); CalleePersonality = LP->getPersonalityFn(); break; } // Find the personality function used by the landing pads of the caller. If it // exists, then check to see that it matches the personality function used in // the callee. if (CalleePersonality) { for (Function::const_iterator I = Caller->begin(), E = Caller->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); const LandingPadInst *LP = BB->getLandingPadInst(); // If the personality functions match, then we can perform the // inlining. Otherwise, we can't inline. // TODO: This isn't 100% true. Some personality functions are proper // supersets of others and can be used in place of the other. if (LP->getPersonalityFn() != CalleePersonality) return false; break; } } // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned // function. SmallVector<ReturnInst*, 8> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; const Argument *Arg = I; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, Arg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); // Calls that we inline may use the new alloca, so we need to clear // their 'tail' flags if HandleByValArgument introduced a new alloca and // the callee has calls. MustClearTailCallFlags |= ActualArg != *AI; } VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); // Update inlined instructions' line number information. fixupLineNumbers(Caller, FirstNewBlock, TheCall); } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); if (AI == 0) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (!isa<Constant>(AI->getArraySize())) continue; // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); ++I; } // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. Caller->getEntryBlock().getInstList().splice(InsertPoint, FirstNewBlock->getInstList(), AI, I); } } // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. if (InsertLifetime && !IFI.StaticAllocas.empty()) { IRBuilder<> builder(FirstNewBlock->begin()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; // If the alloca is already scoped to something smaller than the whole // function then there's no need to add redundant, less accurate markers. if (hasLifetimeMarkers(AI)) continue; // Try to determine the size of the allocation. ConstantInt *AllocaSize = 0; if (ConstantInt *AIArraySize = dyn_cast<ConstantInt>(AI->getArraySize())) { if (IFI.TD) { Type *AllocaType = AI->getAllocatedType(); uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType); uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); // Check that array size doesn't saturate uint64_t and doesn't // overflow when it's multiplied by type size. if (AllocaArraySize != ~0ULL && UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), AllocaArraySize * AllocaTypeSize); } } } builder.CreateLifetimeStart(AI, AllocaSize); for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { IRBuilder<> builder(Returns[ri]); builder.CreateLifetimeEnd(AI, AllocaSize); } } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) .CreateCall(StackSave, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); } } // If we are inlining tail call instruction through a call site that isn't // marked 'tail', we must remove the tail marker for any calls in the inlined // code. Also, calls inlined through a 'nounwind' call site should be marked // 'nounwind'. if (InlinedFunctionInfo.ContainsCalls && (MustClearTailCallFlags || MarkNoUnwind)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (MustClearTailCallFlags) CI->setTailCall(false); if (MarkNoUnwind) CI->setDoesNotThrow(); } } // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); NewBr->setDebugLoc(Returns[0]->getDebugLoc()); } // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!TheCall->use_empty()) { ReturnInst *R = Returns[0]; if (TheCall == R->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); // We are now done with the inlining. return true; } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; BranchInst *CreatedBranchToNormalDest = NULL; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest, CalledFunc->getName()+".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(TheCall, CalledFunc->getName()+".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. Type *RTy = CalledFunc->getReturnType(); PHINode *PHI = 0; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node // as appropriate. if (PHI) { for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } } // Add a branch to the merge points and remove return instructions. DebugLoc Loc; for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; BranchInst* BI = BranchInst::Create(AfterCallBB, RI); Loc = RI->getDebugLoc(); BI->setDebugLoc(Loc); RI->eraseFromParent(); } // We need to set the debug location to *somewhere* inside the // inlined function. The line number may be nonsensical, but the // instruction will at least be associated with the right // function. if (CreatedBranchToNormalDest) CreatedBranchToNormalDest->setDebugLoc(Loc); } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!TheCall->use_empty()) { if (TheCall == Returns[0]->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); } // Update PHI nodes that use the ReturnBB to use the AfterCallBB. BasicBlock *ReturnBB = Returns[0]->getParent(); ReturnBB->replaceAllUsesWith(AfterCallBB); // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); if (CreatedBranchToNormalDest) CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!TheCall->use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); // Remove the unconditional branch. OrigBB->getInstList().erase(Br); // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } } return true; }
// FIXME: This might have been relevant for the old JIT but the MCJIT // has a completly different implementation so this comment below is // likely irrelevant and misleading. // // For performance purposes we construct the stub in such a way that the // arguments pointer is passed through the static global variable gTheArgsP in // this file. This is done so that the stub function prototype trivially matches // the special cases that the JIT knows how to directly call. If this is not // done, then the jit will end up generating a nullary stub just to call our // stub, for every single function call. Function *ExternalDispatcherImpl::createDispatcher(Function *target, Instruction *inst, Module *module) { if (!resolveSymbol(target->getName())) return 0; CallSite cs; if (inst->getOpcode() == Instruction::Call) { cs = CallSite(cast<CallInst>(inst)); } else { cs = CallSite(cast<InvokeInst>(inst)); } Value **args = new Value *[cs.arg_size()]; std::vector<Type *> nullary; // MCJIT functions need unique names, or wrong function can be called. // The module identifier is included because for the MCJIT we need // unique function names across all `llvm::Modules`s. std::string fnName = "dispatcher_" + target->getName().str() + module->getModuleIdentifier(); Function *dispatcher = Function::Create(FunctionType::get(Type::getVoidTy(ctx), nullary, false), GlobalVariable::ExternalLinkage, fnName, module); BasicBlock *dBB = BasicBlock::Create(ctx, "entry", dispatcher); // Get a Value* for &gTheArgsP, as an i64**. Instruction *argI64sp = new IntToPtrInst( ConstantInt::get(Type::getInt64Ty(ctx), (uintptr_t)(void *)&gTheArgsP), PointerType::getUnqual(PointerType::getUnqual(Type::getInt64Ty(ctx))), "argsp", dBB); Instruction *argI64s = new LoadInst(argI64sp, "args", dBB); // Get the target function type. FunctionType *FTy = cast<FunctionType>( cast<PointerType>(target->getType())->getElementType()); // Each argument will be passed by writing it into gTheArgsP[i]. unsigned i = 0, idx = 2; for (CallSite::arg_iterator ai = cs.arg_begin(), ae = cs.arg_end(); ai != ae; ++ai, ++i) { // Determine the type the argument will be passed as. This accomodates for // the corresponding code in Executor.cpp for handling calls to bitcasted // functions. Type *argTy = (i < FTy->getNumParams() ? FTy->getParamType(i) : (*ai)->getType()); Instruction *argI64p = GetElementPtrInst::Create( KLEE_LLVM_GEP_TYPE(nullptr) argI64s, ConstantInt::get(Type::getInt32Ty(ctx), idx), "", dBB); Instruction *argp = new BitCastInst(argI64p, PointerType::getUnqual(argTy), "", dBB); args[i] = new LoadInst(argp, "", dBB); unsigned argSize = argTy->getPrimitiveSizeInBits(); idx += ((!!argSize ? argSize : 64) + 63) / 64; } Constant *dispatchTarget = module->getOrInsertFunction( target->getName(), FTy, target->getAttributes()); Instruction *result = CallInst::Create( dispatchTarget, llvm::ArrayRef<Value *>(args, args + i), "", dBB); if (result->getType() != Type::getVoidTy(ctx)) { Instruction *resp = new BitCastInst( argI64s, PointerType::getUnqual(result->getType()), "", dBB); new StoreInst(result, resp, dBB); } ReturnInst::Create(ctx, dBB); delete[] args; return dispatcher; }
int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) { // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) CalleeFI->analyzeFunction(Callee, TD); // InlineCost - This value measures how good of an inline candidate this call // site is to inline. A lower inline cost make is more likely for the call to // be inlined. This value may go negative. // int InlineCost = 0; // Compute any size reductions we can expect due to arguments being passed into // the function. // unsigned ArgNo = 0; CallSite::arg_iterator I = CS.arg_begin(); for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); FI != FE; ++I, ++FI, ++ArgNo) { // If an alloca is passed in, inlining this function is likely to allow // significant future optimization possibilities (like scalar promotion, and // scalarization), so encourage the inlining of the function. // if (isa<AllocaInst>(I)) InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; // If this is a constant being passed into the function, use the argument // weights calculated for the callee to determine how much will be folded // away with this information. else if (isa<Constant>(I)) InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; } const DenseMap<std::pair<unsigned, unsigned>, unsigned> &ArgPairWeights = CalleeFI->PointerArgPairWeights; for (DenseMap<std::pair<unsigned, unsigned>, unsigned>::const_iterator I = ArgPairWeights.begin(), E = ArgPairWeights.end(); I != E; ++I) if (CS.getArgument(I->first.first)->stripInBoundsConstantOffsets() == CS.getArgument(I->first.second)->stripInBoundsConstantOffsets()) InlineCost -= I->second; // Each argument passed in has a cost at both the caller and the callee // sides. Measurements show that each argument costs about the same as an // instruction. InlineCost -= (CS.arg_size() * InlineConstants::InstrCost); // Now that we have considered all of the factors that make the call site more // likely to be inlined, look at factors that make us not want to inline it. // Calls usually take a long time, so they make the inlining gain smaller. InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; // Look at the size of the callee. Each instruction counts as 5. InlineCost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; return InlineCost; }
InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, SmallPtrSet<const Function*, 16> &NeverInline) { Instruction *TheCall = CS.getInstruction(); Function *Caller = TheCall->getParent()->getParent(); bool isDirectCall = CS.getCalledFunction() == Callee; // Don't inline functions which can be redefined at link-time to mean // something else. Don't inline functions marked noinline or call sites // marked noinline. if (Callee->mayBeOverridden() || Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) || CS.isNoInline()) return llvm::InlineCost::getNever(); // Get information about the callee. FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee]; // If we haven't calculated this information yet, do so now. if (CalleeFI->Metrics.NumBlocks == 0) CalleeFI->analyzeFunction(Callee); // If we should never inline this, return a huge cost. if (CalleeFI->NeverInline()) return InlineCost::getNever(); // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we // could move this up and avoid computing the FunctionInfo for // things we are going to just return always inline for. This // requires handling setjmp somewhere else, however. if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline)) return InlineCost::getAlways(); if (CalleeFI->Metrics.usesDynamicAlloca) { // Get infomation about the caller. FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; // If we haven't calculated this information yet, do so now. if (CallerFI.Metrics.NumBlocks == 0) { CallerFI.analyzeFunction(Caller); // Recompute the CalleeFI pointer, getting Caller could have invalidated // it. CalleeFI = &CachedFunctionInfo[Callee]; } // Don't inline a callee with dynamic alloca into a caller without them. // Functions containing dynamic alloca's are inefficient in various ways; // don't create more inefficiency. if (!CallerFI.Metrics.usesDynamicAlloca) return InlineCost::getNever(); } // InlineCost - This value measures how good of an inline candidate this call // site is to inline. A lower inline cost make is more likely for the call to // be inlined. This value may go negative. // int InlineCost = 0; // Add to the inline quality for properties that make the call valuable to // inline. This includes factors that indicate that the result of inlining // the function will be optimizable. Currently this just looks at arguments // passed into the function. // unsigned ArgNo = 0; CallSite::arg_iterator I = CS.arg_begin(); for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end(); FI != FE; ++I, ++FI, ++ArgNo) { // If an alloca is passed in, inlining this function is likely to allow // significant future optimization possibilities (like scalar promotion, and // scalarization), so encourage the inlining of the function. // if (isa<AllocaInst>(I)) InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight; // If this is a constant being passed into the function, use the argument // weights calculated for the callee to determine how much will be folded // away with this information. else if (isa<Constant>(I)) { InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight; // Compute any constant bonus due to inlining we want to give here. InlineCost -= CountBonusForConstant(FI); } } // Each argument passed in has a cost at both the caller and the callee // sides. Measurements show that each argument costs about the same as an // instruction. InlineCost -= (CS.arg_size() * InlineConstants::InstrCost); // If there is only one call of the function, and it has internal linkage, // make it almost guaranteed to be inlined. // if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall) InlineCost += InlineConstants::LastCallToStaticBonus; // Now that we have considered all of the factors that make the call site more // likely to be inlined, look at factors that make us not want to inline it. // If the instruction after the call, or if the normal destination of the // invoke is an unreachable instruction, the function is noreturn. As such, // there is little point in inlining this. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { if (isa<UnreachableInst>(II->getNormalDest()->begin())) InlineCost += InlineConstants::NoreturnPenalty; } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall))) InlineCost += InlineConstants::NoreturnPenalty; // If this function uses the coldcc calling convention, prefer not to inline // it. if (Callee->getCallingConv() == CallingConv::Cold) InlineCost += InlineConstants::ColdccPenalty; // Calls usually take a long time, so they make the inlining gain smaller. InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty; // Look at the size of the callee. Each instruction counts as 5. InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost; return llvm::InlineCost::get(InlineCost); }
/// \brief Analyze a call site for potential inlining. /// /// Returns true if inlining this call is viable, and false if it is not /// viable. It computes the cost and adjusts the threshold based on numerous /// factors and heuristics. If this method returns false but the computed cost /// is below the computed threshold, then inlining was forcibly disabled by /// some artifact of the routine. bool CallAnalyzer::analyzeCall(CallSite CS) { ++NumCallsAnalyzed; // Track whether the post-inlining function would have more than one basic // block. A single basic block is often intended for inlining. Balloon the // threshold by 50% until we pass the single-BB phase. bool SingleBB = true; int SingleBBBonus = Threshold / 2; Threshold += SingleBBBonus; // Perform some tweaks to the cost and threshold based on the direct // callsite information. // We want to more aggressively inline vector-dense kernels, so up the // threshold, and we'll lower it if the % of vector instructions gets too // low. assert(NumInstructions == 0); assert(NumVectorInstructions == 0); FiftyPercentVectorBonus = Threshold; TenPercentVectorBonus = Threshold / 2; // Give out bonuses per argument, as the instructions setting them up will // be gone after inlining. for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { if (TD && CS.isByValArgument(I)) { // We approximate the number of loads and stores needed by dividing the // size of the byval type by the target's pointer size. PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType()); unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); unsigned PointerSize = TD->getPointerSizeInBits(); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; // If it generates more than 8 stores it is likely to be expanded as an // inline memcpy so we take that as an upper bound. Otherwise we assume // one load and one store per word copied. // FIXME: The maxStoresPerMemcpy setting from the target should be used // here instead of a magic number of 8, but it's not available via // DataLayout. NumStores = std::min(NumStores, 8U); Cost -= 2 * NumStores * InlineConstants::InstrCost; } else { // For non-byval arguments subtract off one instruction per call // argument. Cost -= InlineConstants::InstrCost; } } // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction(); if (OnlyOneCallAndLocalLinkage) Cost += InlineConstants::LastCallToStaticBonus; // If the instruction after the call, or if the normal destination of the // invoke is an unreachable instruction, the function is noreturn. As such, // there is little point in inlining this unless there is literally zero // cost. Instruction *Instr = CS.getInstruction(); if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) { if (isa<UnreachableInst>(II->getNormalDest()->begin())) Threshold = 1; } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr))) Threshold = 1; // If this function uses the coldcc calling convention, prefer not to inline // it. if (F.getCallingConv() == CallingConv::Cold) Cost += InlineConstants::ColdccPenalty; // Check if we're done. This can happen due to bonuses and penalties. if (Cost > Threshold) return false; if (F.empty()) return true; Function *Caller = CS.getInstruction()->getParent()->getParent(); // Check if the caller function is recursive itself. for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); U != E; ++U) { CallSite Site(cast<Value>(*U)); if (!Site) continue; Instruction *I = Site.getInstruction(); if (I->getParent()->getParent() == Caller) { IsCallerRecursive = true; break; } } // Track whether we've seen a return instruction. The first return // instruction is free, as at least one will usually disappear in inlining. bool HasReturn = false; // Populate our simplified values by mapping from function arguments to call // arguments with known important simplifications. CallSite::arg_iterator CAI = CS.arg_begin(); for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); FAI != FAE; ++FAI, ++CAI) { assert(CAI != CS.arg_end()); if (Constant *C = dyn_cast<Constant>(CAI)) SimplifiedValues[FAI] = C; Value *PtrArg = *CAI; if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); // We can SROA any pointer arguments derived from alloca instructions. if (isa<AllocaInst>(PtrArg)) { SROAArgValues[FAI] = PtrArg; SROAArgCosts[PtrArg] = 0; } } } NumConstantArgs = SimplifiedValues.size(); NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); NumAllocaArgs = SROAArgValues.size(); // The worklist of live basic blocks in the callee *after* inlining. We avoid // adding basic blocks of the callee which can be proven to be dead for this // particular call site in order to get more accurate cost estimates. This // requires a somewhat heavyweight iteration pattern: we need to walk the // basic blocks in a breadth-first order as we insert live successors. To // accomplish this, prioritizing for small iterations because we exit after // crossing our threshold, we use a small-size optimized SetVector. typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>, SmallPtrSet<BasicBlock *, 16> > BBSetVector; BBSetVector BBWorklist; BBWorklist.insert(&F.getEntryBlock()); // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. if (Cost > (Threshold + VectorBonus)) break; BasicBlock *BB = BBWorklist[Idx]; if (BB->empty()) continue; // Handle the terminator cost here where we can track returns and other // function-wide constructs. TerminatorInst *TI = BB->getTerminator(); // We never want to inline functions that contain an indirectbr. This is // incorrect because all the blockaddress's (in static global initializers // for example) would be referring to the original function, and this // indirect jump would jump from the inlined copy of the function into the // original function which is extremely undefined behavior. // FIXME: This logic isn't really right; we can safely inline functions // with indirectbr's as long as no other function or global references the // blockaddress of a block within the current function. And as a QOI issue, // if someone is using a blockaddress without an indirectbr, and that // reference somehow ends up in another function or global, we probably // don't want to inline this function. if (isa<IndirectBrInst>(TI)) return false; if (!HasReturn && isa<ReturnInst>(TI)) HasReturn = true; else Cost += InlineConstants::InstrCost; // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. if (!analyzeBlock(BB)) { if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) return false; // If the caller is a recursive function then we don't want to inline // functions which allocate a lot of stack space because it would increase // the caller stack usage dramatically. if (IsCallerRecursive && AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) return false; break; } // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { if (BI->isConditional()) { Value *Cond = BI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); continue; } } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Value *Cond = SI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) { BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); continue; } } // If we're unable to select a particular successor, just count all of // them. for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx) BBWorklist.insert(TI->getSuccessor(TIdx)); // If we had any successors at this point, than post-inlining is likely to // have them as well. Note that we assume any basic blocks which existed // due to branches or switches which folded above will also fold after // inlining. if (SingleBB && TI->getNumSuccessors() > 1) { // Take off the bonus we applied to the threshold. Threshold -= SingleBBBonus; SingleBB = false; } } // If this is a noduplicate call, we can still inline as long as // inlining this would cause the removal of the caller (so the instruction // is not actually duplicated, just moved). if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) return false; Threshold += VectorBonus; return Cost < Threshold; }
/// performCallSlotOptzn - takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // The general transformation to keep in mind is // // call @func(..., src, ...) // memcpy(dest, src, ...) // // -> // // memcpy(dest, src, ...) // call @func(..., dest, ...) // // Since moving the memcpy is technically awkward, we additionally check that // src only holds uninitialized values at the moment of the call, meaning that // the memcpy can be discarded rather than moved. // Deliberately get the source and destination with bitcasts stripped away, // because we'll need to do type comparisons based on the underlying type. Value *cpyDest = cpy->getDest(); Value *cpySrc = cpy->getSource(); CallSite CS = CallSite::get(C); // We need to be able to reason about the size of the memcpy, so we require // that it be a constant. ConstantInt *cpyLength = dyn_cast<ConstantInt>(cpy->getLength()); if (!cpyLength) return false; // Require that src be an alloca. This simplifies the reasoning considerably. AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); if (!srcAlloca) return false; // Check that all of src is copied to dest. TargetData *TD = getAnalysisIfAvailable<TargetData>(); if (!TD) return false; ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) return false; uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); if (cpyLength->getZExtValue() < srcSize) return false; // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { // The destination is an alloca. Check it is larger than srcSize. ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); if (!destArraySize) return false; uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) * destArraySize->getZExtValue(); if (destSize < srcSize) return false; } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { // If the destination is an sret parameter then only accesses that are // outside of the returned struct type can trap. if (!A->hasStructRetAttr()) return false; const Type *StructTy = cast<PointerType>(A->getType())->getElementType(); uint64_t destSize = TD->getTypeAllocSize(StructTy); if (destSize < srcSize) return false; } else { return false; } // Check that src is not accessed except via the call and the memcpy. This // guarantees that it holds only undefined values when passed in (so the final // memcpy can be dropped), that it is not read or written between the call and // the memcpy, and that writing beyond the end of it is undefined. SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(), srcAlloca->use_end()); while (!srcUseList.empty()) { User *UI = srcUseList.pop_back_val(); if (isa<BitCastInst>(UI)) { for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) srcUseList.push_back(*I); } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) { if (G->hasAllZeroIndices()) for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) srcUseList.push_back(*I); else return false; } else if (UI != C && UI != cpy) { return false; } } // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. DominatorTree &DT = getAnalysis<DominatorTree>(); if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) if (!DT.dominates(cpyDestInst, C)) return false; // In addition to knowing that the call does not access src in some // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); if (AA.getModRefInfo(C, cpy->getRawDest(), srcSize) != AliasAnalysis::NoModRef) return false; // All the checks have passed, so do the transformation. bool changedArgument = false; for (unsigned i = 0; i < CS.arg_size(); ++i) if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { if (cpySrc->getType() != cpyDest->getType()) cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), cpyDest->getName(), C); changedArgument = true; if (CS.getArgument(i)->getType() == cpyDest->getType()) CS.setArgument(i, cpyDest); else CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, CS.getArgument(i)->getType(), cpyDest->getName(), C)); } if (!changedArgument) return false; // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>(); MD.removeInstruction(C); // Remove the memcpy MD.removeInstruction(cpy); cpy->eraseFromParent(); ++NumMemCpyInstr; return true; }
CallSite SancusModuleCreator::handleSancusCall(CallSite cs) { assert(cs.arg_size() >= 2 && "sancus_call needs at least 2 arguments"); auto numAsmArgs = cs.arg_size() - 2; if (numAsmArgs > 3) { report_fatal_error("Currently, maximum 3 arguments are supported by " "sancus_call"); } auto argTys = std::vector<Type*>{voidPtrTy, wordTy}; auto entryVal = cs.getArgument(0); auto indexVal = cs.getArgument(1); auto args = std::vector<Value*>{entryVal, indexVal}; auto inputConstraints = std::string{",r,r"}; std::ostringstream argsAsm; auto regsUsage = unsigned{0x10}; auto clobbers = std::string{",~{r6},~{r7},~{r8},~{r15}"}; auto callerInfo = getSancusModuleInfo(cs.getCaller()); for (decltype(numAsmArgs) i = 0; i < numAsmArgs; i++) { auto arg = cs.getArgument(i + 2); auto argTy = arg->getType(); // TODO FunctionCcInfo should be used to check the argument. This is not // possible currently since it takes a Function* as argument. if (argTy->getPrimitiveSizeInBits() != 16 && !argTy->isPointerTy()) report_fatal_error("Illegal argument type in call to sancus_call"); inputConstraints += ",r"; std::ostringstream regStream; regStream << "r" << 15 - i; auto regStr = regStream.str(); if (regStr != "r15") clobbers += ",~{" + regStr + "}"; argsAsm << "mov $" << i + 3 << ", " << regStr << "\n\t"; args.push_back(arg); argTys.push_back(argTy); // see sm_exit.s to understand how this value is used. regsUsage >>= 1; } auto asmStr = Twine("push #1f\n\t" "push r6\n\t" "push r7\n\t" "push r8\n\t" "mov $2, r6\n\t" "mov #" + Twine(regsUsage) + ", r7\n\t" "mov $1, r8\n\t" + argsAsm.str() + "br #" + callerInfo.getExitName() + "\n" "1:\n\t" "mov r15, $0" ).str(); auto constraintsStr = "=r" + inputConstraints + clobbers; auto resTy = wordTy; auto asmFuncTy = FunctionType::get(resTy, argTys, /*isVarArg=*/false); // NOTE hasSideEffects has to be true because the inline assembly may be // optimised away otherwise auto inlineAsm = InlineAsm::get(asmFuncTy, asmStr, constraintsStr, /*hasSideEffects=*/true); auto asmCall = CallInst::Create(inlineAsm, args); return CallSite(asmCall); }
void GraphBuilder::visitCallSite(CallSite CS) { // // Get the called value. Strip off any casts which are lossless. // Value *Callee = CS.getCalledValue()->stripPointerCasts(); // Special case handling of certain libc allocation functions here. if (Function *F = dyn_cast<Function>(Callee)) if (F->isIntrinsic() && visitIntrinsic(CS, F)) return; //Can't do much about inline asm (yet!) if (isa<InlineAsm> (Callee)) { ++NumAsmCall; DSNodeHandle RetVal; Instruction *I = CS.getInstruction(); if (isa<PointerType > (I->getType())) RetVal = getValueDest(I); // Calculate the arguments vector... for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) if (isa<PointerType > ((*I)->getType())) RetVal.mergeWith(getValueDest(*I)); if (!RetVal.isNull()) RetVal.getNode()->foldNodeCompletely(); return; } // Set up the return value... DSNodeHandle RetVal; Instruction *I = CS.getInstruction(); if (isa<PointerType>(I->getType())) RetVal = getValueDest(I); DSNode *CalleeNode = 0; if (!isa<Function>(Callee)) { CalleeNode = getValueDest(Callee).getNode(); if (CalleeNode == 0) { DEBUG(errs() << "WARNING: Program is calling through a null pointer?\n" << *I); return; // Calling a null pointer? } } // NOTE: This code is identical to 'DSGraph::getDSCallSiteForCallSite', // the reason it's duplicated is because this calls getValueDest instead // of getNodeForValue to get the DSNodes for the arguments. Since we're in // local it's possible that we need to create a DSNode for the argument, as // opposed to getNodeForValue which simply retrieves the existing node. //Get the FunctionType for the called function const FunctionType *CalleeFuncType = DSCallSite::FunctionTypeOfCallSite(CS); int NumFixedArgs = CalleeFuncType->getNumParams(); // Sanity check--this really, really shouldn't happen if (!CalleeFuncType->isVarArg()) assert(CS.arg_size() == static_cast<unsigned>(NumFixedArgs) && "Too many arguments/incorrect function signature!"); std::vector<DSNodeHandle> Args; Args.reserve(CS.arg_end()-CS.arg_begin()); DSNodeHandle VarArgNH; // Calculate the arguments vector... // Add all fixed pointer arguments, then merge the rest together for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) if (isa<PointerType>((*I)->getType())) { DSNodeHandle ArgNode = getValueDest(*I); if (I - CS.arg_begin() < NumFixedArgs) { Args.push_back(ArgNode); } else { VarArgNH.mergeWith(ArgNode); } } // Add a new function call entry... if (CalleeNode) { ++NumIndirectCall; G.getFunctionCalls().push_back(DSCallSite(CS, RetVal, VarArgNH, CalleeNode, Args)); } else { ++NumDirectCall; G.getFunctionCalls().push_back(DSCallSite(CS, RetVal, VarArgNH, cast<Function>(Callee), Args)); } }
/// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { if (isa<PHINode>(II)) continue; // PHI nodes don't count. // Special handling for calls. if (isa<CallInst>(II) || isa<InvokeInst>(II)) { if (isa<DbgInfoIntrinsic>(II)) continue; // Debug intrinsics don't count as size. CallSite CS = CallSite::get(const_cast<Instruction*>(&*II)); // If this function contains a call to setjmp or _setjmp, never inline // it. This is a hack because we depend on the user marking their local // variables as volatile if they are live across a setjmp call, and they // probably won't do this in callers. if (Function *F = CS.getCalledFunction()) { if (F->isDeclaration() && (F->getName() == "setjmp" || F->getName() == "_setjmp")) callsSetJmp = true; // If this call is to function itself, then the function is recursive. // Inlining it into other functions is a bad idea, because this is // basically just a form of loop peeling, and our metrics aren't useful // for that case. if (F == BB->getParent()) isRecursive = true; } if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) { // Each argument to a call takes on average one instruction to set up. NumInsts += CS.arg_size(); // We don't want inline asm to count as a call - that would prevent loop // unrolling. The argument setup cost is still real, though. if (!isa<InlineAsm>(CS.getCalledValue())) ++NumCalls; } } if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { if (!AI->isStaticAlloca()) this->usesDynamicAlloca = true; } if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) ++NumVectorInsts; if (const CastInst *CI = dyn_cast<CastInst>(II)) { // Noop casts, including ptr <-> int, don't count. if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || isa<PtrToIntInst>(CI)) continue; // Result of a cmp instruction is often extended (to be used by other // cmp instructions, logical or return instructions). These are usually // nop on most sane targets. if (isa<CmpInst>(CI->getOperand(0))) continue; } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){ // If a GEP has all constant indices, it will probably be folded with // a load/store. if (GEPI->hasAllConstantIndices()) continue; } ++NumInsts; } if (isa<ReturnInst>(BB->getTerminator())) ++NumRets; // We never want to inline functions that contain an indirectbr. This is // incorrect because all the blockaddress's (in static global initializers // for example) would be referring to the original function, and this indirect // jump would jump from the inlined copy of the function into the original // function which is extremely undefined behavior. if (isa<IndirectBrInst>(BB->getTerminator())) containsIndirectBr = true; // Remember NumInsts for this BB. NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; }