/// Evaluate a call to function F, returning true if successful, false if we /// can't evaluate it. ActualArgs contains the formal arguments for the /// function. bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, const SmallVectorImpl<Constant*> &ActualArgs) { // Check to see if this function is already executing (recursion). If so, // bail out. TODO: we might want to accept limited recursion. if (is_contained(CallStack, F)) return false; CallStack.push_back(F); // Initialize arguments to the incoming values specified. unsigned ArgNo = 0; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI, ++ArgNo) setVal(&*AI, ActualArgs[ArgNo]); // ExecutedBlocks - We only handle non-looping, non-recursive code. As such, // we can only evaluate any one basic block at most once. This set keeps // track of what we have executed so we can detect recursive cases etc. SmallPtrSet<BasicBlock*, 32> ExecutedBlocks; // CurBB - The current basic block we're evaluating. BasicBlock *CurBB = &F->front(); BasicBlock::iterator CurInst = CurBB->begin(); while (1) { BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); if (!EvaluateBlock(CurInst, NextBB)) return false; if (!NextBB) { // Successfully running until there's no next block means that we found // the return. Fill it the return value and pop the call stack. ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator()); if (RI->getNumOperands()) RetVal = getVal(RI->getOperand(0)); CallStack.pop_back(); return true; } // Okay, we succeeded in evaluating this control flow. See if we have // executed the new block before. If so, we have a looping function, // which we cannot evaluate in reasonable time. if (!ExecutedBlocks.insert(NextBB).second) return false; // looped! // Okay, we have never been in this block before. Check to see if there // are any PHI nodes. If so, evaluate them with information about where // we came from. PHINode *PN = nullptr; for (CurInst = NextBB->begin(); (PN = dyn_cast<PHINode>(CurInst)); ++CurInst) setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB))); // Advance to the next block. CurBB = NextBB; } }
void InstructionReplace::insertStores(llvm::Module& M) { for(llvm::Module::iterator F = M.begin(), ME = M.end(); F != ME; ++F) { //if(!F->getFnAttributes().hasAttribute(Attributes::AttrVal::MaskedCopy)) { continue; } llvm::Function* Fun = maskedfn[F]; vector<Value*> outputshares; ReturnInst* tbd = NULL; auto arg = Fun->arg_begin(); for(unsigned int i = 0; i <= MaskingOrder; i++) { ++arg; } for(unsigned int i = 0; i <= MaskingOrder; i++) { outputshares.push_back(arg++); } for(llvm::Function::iterator BB = Fun->begin(), FE = Fun->end(); BB != FE; ++BB) { for( llvm::BasicBlock::iterator i = BB->begin(); i != BB->end(); i++) { if(tbd != NULL) {tbd->eraseFromParent(); tbd = NULL;} if(!isa<ReturnInst>(i)) {continue;} ReturnInst* ri = cast<ReturnInst>(i); IRBuilder<> ib = llvm::IRBuilder<>(BB->getContext()); ib.SetInsertPoint(i); vector<Value*> shares = MaskValue(ri->getReturnValue(), ri); for(unsigned int i = 0; i <= MaskingOrder; i++) { ib.CreateStore(shares[i], outputshares[i]); } ib.CreateRetVoid(); tbd = ri; } } if(tbd != NULL) {tbd->eraseFromParent(); tbd = NULL;} } }
CallInst *BasicBlock::getTerminatingMustTailCall() { if (InstList.empty()) return nullptr; ReturnInst *RI = dyn_cast<ReturnInst>(&InstList.back()); if (!RI || RI == &InstList.front()) return nullptr; Instruction *Prev = RI->getPrevNode(); if (!Prev) return nullptr; if (Value *RV = RI->getReturnValue()) { if (RV != Prev) return nullptr; // Look through the optional bitcast. if (auto *BI = dyn_cast<BitCastInst>(Prev)) { RV = BI->getOperand(0); Prev = BI->getPrevNode(); if (!Prev || RV != Prev) return nullptr; } } if (auto *CI = dyn_cast<CallInst>(Prev)) { if (CI->isMustTailCall()) return CI; } return nullptr; }
void GCInvariantVerifier::visitReturnInst(ReturnInst &RI) { if (!RI.getReturnValue()) return; Type *RTy = RI.getReturnValue()->getType(); if (!RTy->isPointerTy()) return; unsigned AS = cast<PointerType>(RTy)->getAddressSpace(); Check(!isSpecialAS(AS) || AS == AddressSpace::Tracked, "Only gc tracked values may be directly returned", &RI); }
void Lint::visitReturnInst(ReturnInst &I) { Function *F = I.getParent()->getParent(); Assert(!F->doesNotReturn(), "Unusual: Return statement in function with noreturn attribute", &I); if (Value *V = I.getReturnValue()) { Value *Obj = findValue(V, /*OffsetOk=*/true); Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I); } }
void Interpreter::visitReturnInst(ReturnInst &I) { ExecutionContext &SF = ECStack.back(); const Type *RetTy = Type::VoidTy; GenericValue Result; // Save away the return value... (if we are not 'ret void') if (I.getNumOperands()) { RetTy = I.getReturnValue()->getType(); Result = getOperandValue(I.getReturnValue(), SF); } popStackAndReturnValueToCaller(RetTy, Result); }
virtual bool isRegofInstFITarget(Value *reg, Instruction *inst) { if (isa<CallInst>(inst)) { CallInst* CI = dyn_cast<CallInst>(inst); Function* called_func = CI->getCalledFunction(); if (called_func == NULL) { return false; } return reg == CI; // selects dst register } else if (isa<ReturnInst>(inst)) { ReturnInst* RI = dyn_cast<ReturnInst>(inst); return reg == RI->getReturnValue(); } else { return false; } }
bool IRTranslator::translateReturn(const ReturnInst &RI) { const Value *Ret = RI.getReturnValue(); // The target may mess up with the insertion point, but // this is not important as a return is the last instruction // of the block anyway. return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret)); }
/// Check to see if the function containing the specified tail call consistently /// returns the same runtime-constant value at all exit points except for /// IgnoreRI. If so, return the returned value. static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { Function *F = CI->getParent()->getParent(); Value *ReturnedValue = nullptr; for (BasicBlock &BBI : *F) { ReturnInst *RI = dyn_cast<ReturnInst>(BBI.getTerminator()); if (RI == nullptr || RI == IgnoreRI) continue; // We can only perform this transformation if the value returned is // evaluatable at the start of the initial invocation of the function, // instead of at the end of the evaluation. // Value *RetOp = RI->getOperand(0); if (!isDynamicConstant(RetOp, CI, RI)) return nullptr; if (ReturnedValue && RetOp != ReturnedValue) return nullptr; // Cannot transform if differing values are returned. ReturnedValue = RetOp; } return ReturnedValue; }
// getCommonReturnValue - Check to see if the function containing the specified // tail call consistently returns the same runtime-constant value at all exit // points except for IgnoreRI. If so, return the returned value. // static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { Function *F = CI->getParent()->getParent(); Value *ReturnedValue = 0; for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) { ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()); if (RI == 0 || RI == IgnoreRI) continue; // We can only perform this transformation if the value returned is // evaluatable at the start of the initial invocation of the function, // instead of at the end of the evaluation. // Value *RetOp = RI->getOperand(0); if (!isDynamicConstant(RetOp, CI, RI)) return 0; if (ReturnedValue && RetOp != ReturnedValue) return 0; // Cannot transform if differing values are returned. ReturnedValue = RetOp; } return ReturnedValue; }
void LowerEmAsyncify::transformAsyncFunction(Function &F, Instructions const& AsyncCalls) { assert(!AsyncCalls.empty()); // Pass 0 // collect all the return instructions from the original function // will use later std::vector<ReturnInst*> OrigReturns; for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { if (ReturnInst *RI = dyn_cast<ReturnInst>(&*I)) { OrigReturns.push_back(RI); } } // Pass 1 // Scan each async call and make the basic structure: // All these will be cloned into the callback functions // - allocate the async context before calling an async function // - check async right after calling an async function, save context & return if async, continue if not // - retrieve the async return value and free the async context if the called function turns out to be sync std::vector<AsyncCallEntry> AsyncCallEntries; AsyncCallEntries.reserve(AsyncCalls.size()); for (Instructions::const_iterator I = AsyncCalls.begin(), E = AsyncCalls.end(); I != E; ++I) { // prepare blocks Instruction *CurAsyncCall = *I; // The block containing the async call BasicBlock *CurBlock = CurAsyncCall->getParent(); // The block should run after the async call BasicBlock *AfterCallBlock = SplitBlock(CurBlock, CurAsyncCall->getNextNode()); // The block where we store the context and return BasicBlock *SaveAsyncCtxBlock = BasicBlock::Create(TheModule->getContext(), "SaveAsyncCtx", &F, AfterCallBlock); // return a dummy value at the end, to make the block valid new UnreachableInst(TheModule->getContext(), SaveAsyncCtxBlock); // allocate the context before making the call // we don't know the size yet, will fix it later // we cannot insert the instruction later because, // we need to make sure that all the instructions and blocks are fixed before we can generate DT and find context variables // In CallHandler.h `sp` will be put as the second parameter // such that we can take a note of the original sp CallInst *AllocAsyncCtxInst = CallInst::Create(AllocAsyncCtxFunction, Constant::getNullValue(I32), "AsyncCtx", CurAsyncCall); // Right after the call // check async and return if so // TODO: we can define truly async functions and partial async functions { // remove old terminator, which came from SplitBlock CurBlock->getTerminator()->eraseFromParent(); // go to SaveAsyncCtxBlock if the previous call is async // otherwise just continue to AfterCallBlock CallInst *CheckAsync = CallInst::Create(CheckAsyncFunction, "IsAsync", CurBlock); BranchInst::Create(SaveAsyncCtxBlock, AfterCallBlock, CheckAsync, CurBlock); } // take a note of this async call AsyncCallEntry CurAsyncCallEntry; CurAsyncCallEntry.AsyncCallInst = CurAsyncCall; CurAsyncCallEntry.AfterCallBlock = AfterCallBlock; CurAsyncCallEntry.AllocAsyncCtxInst = AllocAsyncCtxInst; CurAsyncCallEntry.SaveAsyncCtxBlock = SaveAsyncCtxBlock; // create an empty function for the callback, which will be constructed later CurAsyncCallEntry.CallbackFunc = Function::Create(CallbackFunctionType, F.getLinkage(), F.getName() + "__async_cb", TheModule); AsyncCallEntries.push_back(CurAsyncCallEntry); } // Pass 2 // analyze the context variables and construct SaveAsyncCtxBlock for each async call // also calculate the size of the context and allocate the async context accordingly for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { AsyncCallEntry & CurEntry = *EI; // Collect everything to be saved FindContextVariables(CurEntry); // Pack the variables as a struct { // TODO: sort them from large memeber to small ones, in order to make the struct compact even when aligned SmallVector<Type*, 8> Types; Types.push_back(CallbackFunctionType->getPointerTo()); for (Values::iterator VI = CurEntry.ContextVariables.begin(), VE = CurEntry.ContextVariables.end(); VI != VE; ++VI) { Types.push_back((*VI)->getType()); } CurEntry.ContextStructType = StructType::get(TheModule->getContext(), Types); } // fix the size of allocation CurEntry.AllocAsyncCtxInst->setOperand(0, ConstantInt::get(I32, DL->getTypeStoreSize(CurEntry.ContextStructType))); // construct SaveAsyncCtxBlock { // fill in SaveAsyncCtxBlock // temporarily remove the terminator for convenience CurEntry.SaveAsyncCtxBlock->getTerminator()->eraseFromParent(); assert(CurEntry.SaveAsyncCtxBlock->empty()); Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo(); BitCastInst *AsyncCtxAddr = new BitCastInst(CurEntry.AllocAsyncCtxInst, AsyncCtxAddrTy, "AsyncCtxAddr", CurEntry.SaveAsyncCtxBlock); SmallVector<Value*, 2> Indices; // store the callback { Indices.push_back(ConstantInt::get(I32, 0)); Indices.push_back(ConstantInt::get(I32, 0)); GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); new StoreInst(CurEntry.CallbackFunc, AsyncVarAddr, CurEntry.SaveAsyncCtxBlock); } // store the context variables for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { Indices.clear(); Indices.push_back(ConstantInt::get(I32, 0)); Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element is the callback function GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); new StoreInst(CurEntry.ContextVariables[i], AsyncVarAddr, CurEntry.SaveAsyncCtxBlock); } // to exit the block, we want to return without unwinding the stack frame CallInst::Create(DoNotUnwindFunction, "", CurEntry.SaveAsyncCtxBlock); ReturnInst::Create(TheModule->getContext(), (F.getReturnType()->isVoidTy() ? 0 : Constant::getNullValue(F.getReturnType())), CurEntry.SaveAsyncCtxBlock); } } // Pass 3 // now all the SaveAsyncCtxBlock's have been constructed // we can clone F and construct callback functions // we could not construct the callbacks in Pass 2 because we need _all_ those SaveAsyncCtxBlock's appear in _each_ callback for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { AsyncCallEntry & CurEntry = *EI; Function *CurCallbackFunc = CurEntry.CallbackFunc; ValueToValueMapTy VMap; // Add the entry block // load variables from the context // also update VMap for CloneFunction BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "AsyncCallbackEntry", CurCallbackFunc); std::vector<LoadInst *> LoadedAsyncVars; { Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo(); BitCastInst *AsyncCtxAddr = new BitCastInst(CurCallbackFunc->arg_begin(), AsyncCtxAddrTy, "AsyncCtx", EntryBlock); SmallVector<Value*, 2> Indices; for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { Indices.clear(); Indices.push_back(ConstantInt::get(I32, 0)); Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element of AsyncCtx is the callback function GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", EntryBlock); LoadedAsyncVars.push_back(new LoadInst(AsyncVarAddr, "", EntryBlock)); // we want the argument to be replaced by the loaded value if (isa<Argument>(CurEntry.ContextVariables[i])) VMap[CurEntry.ContextVariables[i]] = LoadedAsyncVars.back(); } } // we don't need any argument, just leave dummy entries there to cheat CloneFunctionInto for (Function::const_arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { if (VMap.count(AI) == 0) VMap[AI] = Constant::getNullValue(AI->getType()); } // Clone the function { SmallVector<ReturnInst*, 8> Returns; CloneFunctionInto(CurCallbackFunc, &F, VMap, false, Returns); // return type of the callback functions is always void // need to fix the return type if (!F.getReturnType()->isVoidTy()) { // for those return instructions that are from the original function // it means we are 'truly' leaving this function // need to store the return value right before ruturn for (size_t i = 0; i < OrigReturns.size(); ++i) { ReturnInst *RI = cast<ReturnInst>(VMap[OrigReturns[i]]); // Need to store the return value into the global area CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", RI); BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, F.getReturnType()->getPointerTo(), "AsyncRetValAddr", RI); new StoreInst(RI->getOperand(0), RetValAddr, RI); } // we want to unwind the stack back to where it was before the original function as called // but we don't actually need to do this here // at this point it must be true that no callback is pended // so the scheduler will correct the stack pointer and pop the frame // here we just fix the return type for (size_t i = 0; i < Returns.size(); ++i) { ReplaceInstWithInst(Returns[i], ReturnInst::Create(TheModule->getContext())); } } } // the callback function does not have any return value // so clear all the attributes for return { AttributeSet Attrs = CurCallbackFunc->getAttributes(); CurCallbackFunc->setAttributes( Attrs.removeAttributes(TheModule->getContext(), AttributeSet::ReturnIndex, Attrs.getRetAttributes()) ); } // in the callback function, we never allocate a new async frame // instead we reuse the existing one for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { Instruction *I = cast<Instruction>(VMap[EI->AllocAsyncCtxInst]); ReplaceInstWithInst(I, CallInst::Create(ReallocAsyncCtxFunction, I->getOperand(0), "ReallocAsyncCtx")); } // mapped entry point & async call BasicBlock *ResumeBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]); Instruction *MappedAsyncCall = cast<Instruction>(VMap[CurEntry.AsyncCallInst]); // To save space, for each async call in the callback function, we just ignore the sync case, and leave it to the scheduler // TODO need an option for this { for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { AsyncCallEntry & CurEntry = *EI; Instruction *MappedAsyncCallInst = cast<Instruction>(VMap[CurEntry.AsyncCallInst]); BasicBlock *MappedAsyncCallBlock = MappedAsyncCallInst->getParent(); BasicBlock *MappedAfterCallBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]); // for the sync case of the call, go to NewBlock (instead of MappedAfterCallBlock) BasicBlock *NewBlock = BasicBlock::Create(TheModule->getContext(), "", CurCallbackFunc, MappedAfterCallBlock); MappedAsyncCallBlock->getTerminator()->setSuccessor(1, NewBlock); // store the return value if (!MappedAsyncCallInst->use_empty()) { CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", NewBlock); BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCallInst->getType()->getPointerTo(), "AsyncRetValAddr", NewBlock); new StoreInst(MappedAsyncCallInst, RetValAddr, NewBlock); } // tell the scheduler that we want to keep the current async stack frame CallInst::Create(DoNotUnwindAsyncFunction, "", NewBlock); // finally we go to the SaveAsyncCtxBlock, to register the callbac, save the local variables and leave BasicBlock *MappedSaveAsyncCtxBlock = cast<BasicBlock>(VMap[CurEntry.SaveAsyncCtxBlock]); BranchInst::Create(MappedSaveAsyncCtxBlock, NewBlock); } } std::vector<AllocaInst*> ToPromote; // applying loaded variables in the entry block { BasicBlockSet ReachableBlocks = FindReachableBlocksFrom(ResumeBlock); for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { Value *OrigVar = CurEntry.ContextVariables[i]; if (isa<Argument>(OrigVar)) continue; // already processed Value *CurVar = VMap[OrigVar]; assert(CurVar != MappedAsyncCall); if (Instruction *Inst = dyn_cast<Instruction>(CurVar)) { if (ReachableBlocks.count(Inst->getParent())) { // Inst could be either defined or loaded from the async context // Do the dirty works in memory // TODO: might need to check the safety first // TODO: can we create phi directly? AllocaInst *Addr = DemoteRegToStack(*Inst, false); new StoreInst(LoadedAsyncVars[i], Addr, EntryBlock); ToPromote.push_back(Addr); } else { // The parent block is not reachable, which means there is no confliction // it's safe to replace Inst with the loaded value assert(Inst != LoadedAsyncVars[i]); // this should only happen when OrigVar is an Argument Inst->replaceAllUsesWith(LoadedAsyncVars[i]); } } } } // resolve the return value of the previous async function // it could be the value just loaded from the global area // or directly returned by the function (in its sync case) if (!CurEntry.AsyncCallInst->use_empty()) { // load the async return value CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", EntryBlock); BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCall->getType()->getPointerTo(), "AsyncRetValAddr", EntryBlock); LoadInst *RetVal = new LoadInst(RetValAddr, "AsyncRetVal", EntryBlock); AllocaInst *Addr = DemoteRegToStack(*MappedAsyncCall, false); new StoreInst(RetVal, Addr, EntryBlock); ToPromote.push_back(Addr); } // TODO remove unreachable blocks before creating phi // We go right to ResumeBlock from the EntryBlock BranchInst::Create(ResumeBlock, EntryBlock); /* * Creating phi's * Normal stack frames and async stack frames are interleaving with each other. * In a callback function, if we call an async function, we might need to realloc the async ctx. * at this point we don't want anything stored after the ctx, * such that we can free and extend the ctx by simply update STACKTOP. * Therefore we don't want any alloca's in callback functions. * */ if (!ToPromote.empty()) { DominatorTreeWrapperPass DTW; DTW.runOnFunction(*CurCallbackFunc); PromoteMemToReg(ToPromote, DTW.getDomTree()); } removeUnreachableBlocks(*CurCallbackFunc); } // Pass 4 // Here are modifications to the original function, which we won't want to be cloned into the callback functions for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { AsyncCallEntry & CurEntry = *EI; // remove the frame if no async functinon has been called CallInst::Create(FreeAsyncCtxFunction, CurEntry.AllocAsyncCtxInst, "", CurEntry.AfterCallBlock->getFirstNonPHI()); } }
void moduleDepGraph::matchParametersAndReturnValues(Function &F) { // Only do the matching if F has any use if (F.isVarArg() || !F.hasNUsesOrMore(1)) { return; } // Data structure which contains the matches between formal and real parameters // First: formal parameter // Second: real parameter SmallVector<std::pair<GraphNode*, GraphNode*>, 4> Parameters(F.arg_size()); // Fetch the function arguments (formal parameters) into the data structure Function::arg_iterator argptr; Function::arg_iterator e; unsigned i; //Create the PHI nodes for the formal parameters for (i = 0, argptr = F.arg_begin(), e = F.arg_end(); argptr != e; ++i, ++argptr) { OpNode* argPHI = new OpNode(Instruction::PHI); GraphNode* argNode = NULL; argNode = depGraph->addInst(argptr); if (argNode != NULL) depGraph->addEdge(argPHI, argNode); Parameters[i].first = argPHI; } // Check if the function returns a supported value type. If not, no return value matching is done bool noReturn = F.getReturnType()->isVoidTy(); // Creates the data structure which receives the return values of the function, if there is any SmallPtrSet<llvm::Value*, 8> ReturnValues; if (!noReturn) { // Iterate over the basic blocks to fetch all possible return values for (Function::iterator bb = F.begin(), bbend = F.end(); bb != bbend; ++bb) { // Get the terminator instruction of the basic block and check if it's // a return instruction: if it's not, continue to next basic block Instruction *terminator = bb->getTerminator(); ReturnInst *RI = dyn_cast<ReturnInst> (terminator); if (!RI) continue; // Get the return value and insert in the data structure ReturnValues.insert(RI->getReturnValue()); } } for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { User *U = *UI; // Ignore blockaddress uses if (isa<BlockAddress> (U)) continue; // Used by a non-instruction, or not the callee of a function, do not // match. if (!isa<CallInst> (U) && !isa<InvokeInst> (U)) continue; Instruction *caller = cast<Instruction> (U); CallSite CS(caller); if (!CS.isCallee(UI)) continue; // Iterate over the real parameters and put them in the data structure CallSite::arg_iterator AI; CallSite::arg_iterator EI; for (i = 0, AI = CS.arg_begin(), EI = CS.arg_end(); AI != EI; ++i, ++AI) { Parameters[i].second = depGraph->addInst(*AI); } // Match formal and real parameters for (i = 0; i < Parameters.size(); ++i) { depGraph->addEdge(Parameters[i].second, Parameters[i].first); } // Match return values if (!noReturn) { OpNode* retPHI = new OpNode(Instruction::PHI); GraphNode* callerNode = depGraph->addInst(caller); depGraph->addEdge(retPHI, callerNode); for (SmallPtrSetIterator<llvm::Value*> ri = ReturnValues.begin(), re = ReturnValues.end(); ri != re; ++ri) { GraphNode* retNode = depGraph->addInst(*ri); depGraph->addEdge(retNode, retPHI); } } // Real parameters are cleaned before moving to the next use (for safety's sake) for (i = 0; i < Parameters.size(); ++i) Parameters[i].second = NULL; } depGraph->deleteCallNodes(&F); }
/// InsertStackProtectors - Insert code into the prologue and epilogue of the /// function. /// /// - The prologue code loads and stores the stack guard onto the stack. /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { bool SupportsSelectionDAGSP = EnableSelectionDAGSP && !TM->Options.EnableFastISel; AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { BasicBlock *BB = &*I++; ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); if (!RI) continue; // Generate prologue instrumentation if not already generated. if (!HasPrologue) { HasPrologue = true; SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, AI); } // SelectionDAG based code generation. Nothing else needs to be done here. // The epilogue instrumentation is postponed to SelectionDAG. if (SupportsSelectionDAGSP) break; // Set HasIRCheck to true, so that SelectionDAG will not generate its own // version. SelectionDAG called 'shouldEmitSDCheck' to check whether // instrumentation has already been generated. HasIRCheck = true; // Generate epilogue instrumentation. The epilogue intrumentation can be // function-based or inlined depending on which mechanism the target is // providing. if (Value* GuardCheck = TLI->getSSPStackGuardCheck(*M)) { // Generate the function-based epilogue instrumentation. // The target provides a guard check function, generate a call to it. IRBuilder<> B(RI); LoadInst *Guard = B.CreateLoad(AI, true, "Guard"); CallInst *Call = B.CreateCall(GuardCheck, {Guard}); llvm::Function *Function = cast<llvm::Function>(GuardCheck); Call->setAttributes(Function->getAttributes()); Call->setCallingConv(Function->getCallingConv()); } else { // Generate the epilogue with inline instrumentation. // If we do not support SelectionDAG based tail calls, generate IR level // tail calls. // // For each block with a return instruction, convert this: // // return: // ... // ret ... // // into this: // // return: // ... // %1 = <stack guard> // %2 = load StackGuardSlot // %3 = cmp i1 %1, %2 // br i1 %3, label %SP_return, label %CallStackCheckFailBlk // // SP_return: // ret ... // // CallStackCheckFailBlk: // call void @__stack_chk_fail() // unreachable // Create the FailBB. We duplicate the BB every time since the MI tail // merge pass will merge together all of the various BB into one including // fail BB generated by the stack protector pseudo instruction. BasicBlock *FailBB = CreateFailBB(); // Split the basic block before the return instruction. BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return"); // Update the dominator tree if we need to. if (DT && DT->isReachableFromEntry(BB)) { DT->addNewBlock(NewBB, BB); DT->addNewBlock(FailBB, BB); } // Remove default branch instruction to the new BB. BB->getTerminator()->eraseFromParent(); // Move the newly created basic block to the point right after the old // basic block so that it's in the "fall through" position. NewBB->moveAfter(BB); // Generate the stack protector instructions in the old basic block. IRBuilder<> B(BB); Value *Guard = getStackGuard(TLI, M, B); LoadInst *LI2 = B.CreateLoad(AI, true); Value *Cmp = B.CreateICmpEQ(Guard, LI2); auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true); auto FailureProb = BranchProbabilityInfo::getBranchProbStackProtector(false); MDNode *Weights = MDBuilder(F->getContext()) .createBranchWeights(SuccessProb.getNumerator(), FailureProb.getNumerator()); B.CreateCondBr(Cmp, NewBB, FailBB, Weights); } } // Return if we didn't modify any basic blocks. i.e., there are no return // statements in the function. return HasPrologue; }
// InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. // // Note that this only does one level of inlining. For example, if the // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now // exists in the instruction stream. Similiarly this will inline a recursive // function by one level. // bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { if (!Caller->hasGC()) Caller->setGC(CalledFunc->getGC()); else if (CalledFunc->getGC() != Caller->getGC()) return false; } // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. // Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned // function. std::vector<ReturnInst*> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy ValueMap after cloning. DenseMap<const Value*, Value*> ValueMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal) && !CalledFunc->onlyReadsMemory()) { const Type *AggTy = cast<PointerType>(I->getType())->getElementType(); const Type *VoidPtrTy = PointerType::getUnqual(Type::Int8Ty); // Create the alloca. If we have TargetData, use nice alignment. unsigned Align = 1; if (TD) Align = TD->getPrefTypeAlignment(AggTy); Value *NewAlloca = new AllocaInst(AggTy, 0, Align, I->getName(), Caller->begin()->begin()); // Emit a memcpy. const Type *Tys[] = { Type::Int64Ty }; Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), Intrinsic::memcpy, Tys, 1); Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); Value *SrcCast = new BitCastInst(*AI, VoidPtrTy, "tmp", TheCall); Value *Size; if (TD == 0) Size = ConstantExpr::getSizeOf(AggTy); else Size = ConstantInt::get(Type::Int64Ty, TD->getTypeStoreSize(AggTy)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. Value *CallArgs[] = { DestCast, SrcCast, Size, ConstantInt::get(Type::Int32Ty, 1) }; CallInst *TheMemCpy = CallInst::Create(MemCpyFn, CallArgs, CallArgs+4, "", TheCall); // If we have a call graph, update it. if (CG) { CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn); CallGraphNode *CallerNode = (*CG)[Caller]; CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN); } // Uses of the argument in the function should use our new alloca // instead. ActualArg = NewAlloca; } ValueMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i", &InlinedFunctionInfo, TD); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. if (CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, *CG); } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. // { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) { // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (isa<Constant>(AI->getArraySize())) { // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) ++I; // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. Caller->getEntryBlock().getInstList().splice( InsertPoint, FirstNewBlock->getInstList(), AI, I); } } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. Constant *StackSave, *StackRestore; StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); StackRestore = Intrinsic::getDeclaration(M, Intrinsic::stackrestore); // If we are preserving the callgraph, add edges to the stacksave/restore // functions for the calls we insert. CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0; if (CG) { // We know that StackSave/StackRestore are Function*'s, because they are // intrinsics which must have the right types. StackSaveCGN = CG->getOrInsertFunction(cast<Function>(StackSave)); StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore)); CallerNode = (*CG)[Caller]; } // Insert the llvm.stacksave. CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack", FirstNewBlock->begin()); if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]); if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); } // Count the number of StackRestore calls we insert. unsigned NumStackRestores = Returns.size(); // If we are inlining an invoke instruction, insert restores before each // unwind. These unwinds will be rewritten into branches later. if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { CallInst::Create(StackRestore, SavedPtr, "", UI); ++NumStackRestores; } } } // If we are inlining tail call instruction through a call site that isn't // marked 'tail', we must remove the tail marker for any calls in the inlined // code. Also, calls inlined through a 'nounwind' call site should be marked // 'nounwind'. if (InlinedFunctionInfo.ContainsCalls && (MustClearTailCallFlags || MarkNoUnwind)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (MustClearTailCallFlags) CI->setTailCall(false); if (MarkNoUnwind) CI->setDoesNotThrow(); } } // If we are inlining through a 'nounwind' call site then any inlined 'unwind' // instructions are unreachable. if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind) for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) { TerminatorInst *Term = BB->getTerminator(); if (isa<UnwindInst>(Term)) { new UnreachableInst(Term); BB->getInstList().erase(Term); } } // If we are inlining for an invoke instruction, we must make sure to rewrite // any inlined 'unwind' instructions into branches to the invoke exception // destination, and call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) BranchInst::Create(II->getNormalDest(), TheCall); // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!TheCall->use_empty()) { ReturnInst *R = Returns[0]; TheCall->replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); // We are now done with the inlining. return true; } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(NewBr, CalledFunc->getName()+".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(TheCall, CalledFunc->getName()+".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. const Type *RTy = CalledFunc->getReturnType(); if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. PHINode *PHI = 0; if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node // as appropriate. if (PHI) { for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } } // Add a branch to the merge points and remove return instructions. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; BranchInst::Create(AfterCallBB, RI); RI->eraseFromParent(); } } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!TheCall->use_empty()) TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. BasicBlock *ReturnBB = Returns[0]->getParent(); AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); // Update PHI nodes that use the ReturnBB to use the AfterCallBB. ReturnBB->replaceAllUsesWith(AfterCallBB); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!TheCall->use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes // Remove the unconditional branch. OrigBB->getInstList().erase(Br); // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); return true; }
/// mergeEmptyReturnBlocks - If we have more than one empty (other than phi /// node) return blocks, merge them together to promote recursive block merging. static bool mergeEmptyReturnBlocks(Function &F) { bool Changed = false; BasicBlock *RetBlock = 0; // Scan all the blocks in the function, looking for empty return blocks. for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) { BasicBlock &BB = *BBI++; // Only look at return blocks. ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); if (Ret == 0) continue; // Only look at the block if it is empty or the only other thing in it is a // single PHI node that is the operand to the return. if (Ret != &BB.front()) { // Check for something else in the block. BasicBlock::iterator I = Ret; --I; // Skip over debug info. while (isa<DbgInfoIntrinsic>(I) && I != BB.begin()) --I; if (!isa<DbgInfoIntrinsic>(I) && (!isa<PHINode>(I) || I != BB.begin() || Ret->getNumOperands() == 0 || Ret->getOperand(0) != I)) continue; } // If this is the first returning block, remember it and keep going. if (RetBlock == 0) { RetBlock = &BB; continue; } // Otherwise, we found a duplicate return block. Merge the two. Changed = true; // Case when there is no input to the return or when the returned values // agree is trivial. Note that they can't agree if there are phis in the // blocks. if (Ret->getNumOperands() == 0 || Ret->getOperand(0) == cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) { BB.replaceAllUsesWith(RetBlock); BB.eraseFromParent(); continue; } // If the canonical return block has no PHI node, create one now. PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin()); if (RetBlockPHI == 0) { Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0); pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock); RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), std::distance(PB, PE), "merge", &RetBlock->front()); for (pred_iterator PI = PB; PI != PE; ++PI) RetBlockPHI->addIncoming(InVal, *PI); RetBlock->getTerminator()->setOperand(0, RetBlockPHI); } // Turn BB into a block that just unconditionally branches to the return // block. This handles the case when the two return blocks have a common // predecessor but that return different things. RetBlockPHI->addIncoming(Ret->getOperand(0), &BB); BB.getTerminator()->eraseFromParent(); BranchInst::Create(RetBlock, &BB); } return Changed; }
/// InsertStackProtectors - Insert code into the prologue and epilogue of the /// function. /// /// - The prologue code loads and stores the stack guard onto the stack. /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { BasicBlock *FailBB = 0; // The basic block to jump to if check fails. BasicBlock *FailBBDom = 0; // FailBB's dominator. AllocaInst *AI = 0; // Place on stack that stores the stack guard. Value *StackGuardVar = 0; // The stack guard variable. for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { BasicBlock *BB = I++; ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); if (!RI) continue; if (!FailBB) { // Insert code into the entry block that stores the __stack_chk_guard // variable onto the stack: // // entry: // StackGuardSlot = alloca i8* // StackGuard = load __stack_chk_guard // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) // const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); unsigned AddressSpace, Offset; if (TLI->getStackCookieLocation(AddressSpace, Offset)) { Constant *OffsetVal = ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, PointerType::get(PtrTy, AddressSpace)); } else { StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); } BasicBlock &Entry = F->getEntryBlock(); Instruction *InsPt = &Entry.front(); AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt); LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt); Value *Args[] = { LI, AI }; CallInst:: Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), &Args[0], array_endof(Args), "", InsPt); // Create the basic block to jump to when the guard check fails. FailBB = CreateFailBB(); if (DT) FailBBDom = DT->isReachableFromEntry(BB) ? BB : 0; } // For each block with a return instruction, convert this: // // return: // ... // ret ... // // into this: // // return: // ... // %1 = load __stack_chk_guard // %2 = load StackGuardSlot // %3 = cmp i1 %1, %2 // br i1 %3, label %SP_return, label %CallStackCheckFailBlk // // SP_return: // ret ... // // CallStackCheckFailBlk: // call void @__stack_chk_fail() // unreachable // Split the basic block before the return instruction. BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); if (DT) { DT->addNewBlock(NewBB, DT->isReachableFromEntry(BB) ? BB : 0); FailBBDom = DT->findNearestCommonDominator(FailBBDom, BB); } // Remove default branch instruction to the new BB. BB->getTerminator()->eraseFromParent(); // Move the newly created basic block to the point right after the old basic // block so that it's in the "fall through" position. NewBB->moveAfter(BB); // Generate the stack protector instructions in the old basic block. LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB); LoadInst *LI2 = new LoadInst(AI, "", true, BB); ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, ""); BranchInst::Create(NewBB, FailBB, Cmp, BB); } // Return if we didn't modify any basic blocks. I.e., there are no return // statements in the function. if (!FailBB) return false; if (DT) DT->addNewBlock(FailBB, FailBBDom); return true; }
void GraphBuilder::visitReturnInst(ReturnInst &RI) { if (RI.getNumOperands() && isa<PointerType>(RI.getOperand(0)->getType())) G.getOrCreateReturnNodeFor(*FB).mergeWith(getValueDest(RI.getOperand(0))); }
/// InlineFunction - This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs /// though. /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, bool InsertLifetime) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { if (!Caller->hasGC()) Caller->setGC(CalledFunc->getGC()); else if (CalledFunc->getGC() != Caller->getGC()) return false; } // Get the personality function from the callee if it contains a landing pad. Value *CalleePersonality = 0; for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); const LandingPadInst *LP = BB->getLandingPadInst(); CalleePersonality = LP->getPersonalityFn(); break; } // Find the personality function used by the landing pads of the caller. If it // exists, then check to see that it matches the personality function used in // the callee. if (CalleePersonality) { for (Function::const_iterator I = Caller->begin(), E = Caller->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) { const BasicBlock *BB = II->getUnwindDest(); const LandingPadInst *LP = BB->getLandingPadInst(); // If the personality functions match, then we can perform the // inlining. Otherwise, we can't inline. // TODO: This isn't 100% true. Some personality functions are proper // supersets of others and can be used in place of the other. if (LP->getPersonalityFn() != CalleePersonality) return false; break; } } // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned // function. SmallVector<ReturnInst*, 8> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; const Argument *Arg = I; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, Arg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); // Calls that we inline may use the new alloca, so we need to clear // their 'tail' flags if HandleByValArgument introduced a new alloca and // the callee has calls. MustClearTailCallFlags |= ActualArg != *AI; } VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); // Update inlined instructions' line number information. fixupLineNumbers(Caller, FirstNewBlock, TheCall); } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); if (AI == 0) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (!isa<Constant>(AI->getArraySize())) continue; // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); ++I; } // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. Caller->getEntryBlock().getInstList().splice(InsertPoint, FirstNewBlock->getInstList(), AI, I); } } // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. if (InsertLifetime && !IFI.StaticAllocas.empty()) { IRBuilder<> builder(FirstNewBlock->begin()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; // If the alloca is already scoped to something smaller than the whole // function then there's no need to add redundant, less accurate markers. if (hasLifetimeMarkers(AI)) continue; // Try to determine the size of the allocation. ConstantInt *AllocaSize = 0; if (ConstantInt *AIArraySize = dyn_cast<ConstantInt>(AI->getArraySize())) { if (IFI.TD) { Type *AllocaType = AI->getAllocatedType(); uint64_t AllocaTypeSize = IFI.TD->getTypeAllocSize(AllocaType); uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); assert(AllocaArraySize > 0 && "array size of AllocaInst is zero"); // Check that array size doesn't saturate uint64_t and doesn't // overflow when it's multiplied by type size. if (AllocaArraySize != ~0ULL && UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), AllocaArraySize * AllocaTypeSize); } } } builder.CreateLifetimeStart(AI, AllocaSize); for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { IRBuilder<> builder(Returns[ri]); builder.CreateLifetimeEnd(AI, AllocaSize); } } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) .CreateCall(StackSave, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); } } // If we are inlining tail call instruction through a call site that isn't // marked 'tail', we must remove the tail marker for any calls in the inlined // code. Also, calls inlined through a 'nounwind' call site should be marked // 'nounwind'. if (InlinedFunctionInfo.ContainsCalls && (MustClearTailCallFlags || MarkNoUnwind)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (MustClearTailCallFlags) CI->setTailCall(false); if (MarkNoUnwind) CI->setDoesNotThrow(); } } // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); NewBr->setDebugLoc(Returns[0]->getDebugLoc()); } // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!TheCall->use_empty()) { ReturnInst *R = Returns[0]; if (TheCall == R->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); // We are now done with the inlining. return true; } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; BranchInst *CreatedBranchToNormalDest = NULL; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest, CalledFunc->getName()+".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(TheCall, CalledFunc->getName()+".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. Type *RTy = CalledFunc->getReturnType(); PHINode *PHI = 0; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node // as appropriate. if (PHI) { for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } } // Add a branch to the merge points and remove return instructions. DebugLoc Loc; for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; BranchInst* BI = BranchInst::Create(AfterCallBB, RI); Loc = RI->getDebugLoc(); BI->setDebugLoc(Loc); RI->eraseFromParent(); } // We need to set the debug location to *somewhere* inside the // inlined function. The line number may be nonsensical, but the // instruction will at least be associated with the right // function. if (CreatedBranchToNormalDest) CreatedBranchToNormalDest->setDebugLoc(Loc); } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!TheCall->use_empty()) { if (TheCall == Returns[0]->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); } // Update PHI nodes that use the ReturnBB to use the AfterCallBB. BasicBlock *ReturnBB = Returns[0]->getParent(); ReturnBB->replaceAllUsesWith(AfterCallBB); // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); if (CreatedBranchToNormalDest) CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!TheCall->use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); // Remove the unconditional branch. OrigBB->getInstList().erase(Br); // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } } return true; }
/// InsertStackProtectors - Insert code into the prologue and epilogue of the /// function. /// /// - The prologue code loads and stores the stack guard onto the stack. /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { bool HasPrologue = false; bool SupportsSelectionDAGSP = EnableSelectionDAGSP && !TM->Options.EnableFastISel; AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. Value *StackGuardVar = nullptr; // The stack guard variable. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { BasicBlock *BB = &*I++; ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); if (!RI) continue; if (!HasPrologue) { HasPrologue = true; SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar); } if (SupportsSelectionDAGSP) { // Since we have a potential tail call, insert the special stack check // intrinsic. Instruction *InsertionPt = nullptr; if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) { InsertionPt = CI; } else { InsertionPt = RI; // At this point we know that BB has a return statement so it *DOES* // have a terminator. assert(InsertionPt != nullptr && "BB must have a terminator instruction at this point."); } Function *Intrinsic = Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck); CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt); } else { // If we do not support SelectionDAG based tail calls, generate IR level // tail calls. // // For each block with a return instruction, convert this: // // return: // ... // ret ... // // into this: // // return: // ... // %1 = load __stack_chk_guard // %2 = load StackGuardSlot // %3 = cmp i1 %1, %2 // br i1 %3, label %SP_return, label %CallStackCheckFailBlk // // SP_return: // ret ... // // CallStackCheckFailBlk: // call void @__stack_chk_fail() // unreachable // Create the FailBB. We duplicate the BB every time since the MI tail // merge pass will merge together all of the various BB into one including // fail BB generated by the stack protector pseudo instruction. BasicBlock *FailBB = CreateFailBB(); // Split the basic block before the return instruction. BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return"); // Update the dominator tree if we need to. if (DT && DT->isReachableFromEntry(BB)) { DT->addNewBlock(NewBB, BB); DT->addNewBlock(FailBB, BB); } // Remove default branch instruction to the new BB. BB->getTerminator()->eraseFromParent(); // Move the newly created basic block to the point right after the old // basic block so that it's in the "fall through" position. NewBB->moveAfter(BB); // Generate the stack protector instructions in the old basic block. IRBuilder<> B(BB); LoadInst *LI1 = B.CreateLoad(StackGuardVar); LoadInst *LI2 = B.CreateLoad(AI); Value *Cmp = B.CreateICmpEQ(LI1, LI2); unsigned SuccessWeight = BranchProbabilityInfo::getBranchWeightStackProtector(true); unsigned FailureWeight = BranchProbabilityInfo::getBranchWeightStackProtector(false); MDNode *Weights = MDBuilder(F->getContext()) .createBranchWeights(SuccessWeight, FailureWeight); B.CreateCondBr(Cmp, NewBB, FailBB, Weights); } } // Return if we didn't modify any basic blocks. i.e., there are no return // statements in the function. return HasPrologue; }
// // Method: runOnModule() // // Description: // Entry point for this LLVM pass. // If a function returns a struct, make it return // a pointer to the struct. // // Inputs: // M - A reference to the LLVM module to transform // // Outputs: // M - The transformed LLVM module. // // Return value: // true - The module was modified. // false - The module was not modified. // bool StructRet::runOnModule(Module& M) { const llvm::DataLayout targetData(&M); std::vector<Function*> worklist; for (Module::iterator I = M.begin(); I != M.end(); ++I) if (!I->mayBeOverridden()) { if(I->hasAddressTaken()) continue; if(I->getReturnType()->isStructTy()) { worklist.push_back(I); } } while(!worklist.empty()) { Function *F = worklist.back(); worklist.pop_back(); Type *NewArgType = F->getReturnType()->getPointerTo(); // Construct the new Type std::vector<Type*>TP; TP.push_back(NewArgType); for (Function::arg_iterator ii = F->arg_begin(), ee = F->arg_end(); ii != ee; ++ii) { TP.push_back(ii->getType()); } FunctionType *NFTy = FunctionType::get(F->getReturnType(), TP, F->isVarArg()); // Create the new function body and insert it into the module. Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName(), &M); ValueToValueMapTy ValueMap; Function::arg_iterator NI = NF->arg_begin(); NI->setName("ret"); ++NI; for (Function::arg_iterator II = F->arg_begin(); II != F->arg_end(); ++II, ++NI) { ValueMap[II] = NI; NI->setName(II->getName()); AttributeSet attrs = F->getAttributes().getParamAttributes(II->getArgNo() + 1); if (!attrs.isEmpty()) NI->addAttr(attrs); } // Perform the cloning. SmallVector<ReturnInst*,100> Returns; if (!F->isDeclaration()) CloneFunctionInto(NF, F, ValueMap, false, Returns); std::vector<Value*> fargs; for(Function::arg_iterator ai = NF->arg_begin(), ae= NF->arg_end(); ai != ae; ++ai) { fargs.push_back(ai); } NF->setAttributes(NF->getAttributes().addAttributes( M.getContext(), 0, F->getAttributes().getRetAttributes())); NF->setAttributes(NF->getAttributes().addAttributes( M.getContext(), ~0, F->getAttributes().getFnAttributes())); for (Function::iterator B = NF->begin(), FE = NF->end(); B != FE; ++B) { for (BasicBlock::iterator I = B->begin(), BE = B->end(); I != BE;) { ReturnInst * RI = dyn_cast<ReturnInst>(I++); if(!RI) continue; LoadInst *LI = dyn_cast<LoadInst>(RI->getOperand(0)); assert(LI && "Return should be preceded by a load instruction"); IRBuilder<> Builder(RI); Builder.CreateMemCpy(fargs.at(0), LI->getPointerOperand(), targetData.getTypeStoreSize(LI->getType()), targetData.getPrefTypeAlignment(LI->getType())); } } for(Value::use_iterator ui = F->use_begin(), ue = F->use_end(); ui != ue; ) { CallInst *CI = dyn_cast<CallInst>(*ui++); if(!CI) continue; if(CI->getCalledFunction() != F) continue; if(CI->hasByValArgument()) continue; AllocaInst *AllocaNew = new AllocaInst(F->getReturnType(), 0, "", CI); SmallVector<Value*, 8> Args; //this should probably be done in a different manner AttributeSet NewCallPAL=AttributeSet(); // Get the initial attributes of the call AttributeSet CallPAL = CI->getAttributes(); AttributeSet RAttrs = CallPAL.getRetAttributes(); AttributeSet FnAttrs = CallPAL.getFnAttributes(); if (!RAttrs.isEmpty()) NewCallPAL=NewCallPAL.addAttributes(F->getContext(),0, RAttrs); Args.push_back(AllocaNew); for(unsigned j = 0; j < CI->getNumOperands()-1; j++) { Args.push_back(CI->getOperand(j)); // position in the NewCallPAL AttributeSet Attrs = CallPAL.getParamAttributes(j); if (!Attrs.isEmpty()) NewCallPAL=NewCallPAL.addAttributes(F->getContext(),Args.size(), Attrs); } // Create the new attributes vec. if (!FnAttrs.isEmpty()) NewCallPAL=NewCallPAL.addAttributes(F->getContext(),~0, FnAttrs); CallInst *CallI = CallInst::Create(NF, Args, "", CI); CallI->setCallingConv(CI->getCallingConv()); CallI->setAttributes(NewCallPAL); LoadInst *LI = new LoadInst(AllocaNew, "", CI); CI->replaceAllUsesWith(LI); CI->eraseFromParent(); } if(F->use_empty()) F->eraseFromParent(); } return true; }
void AAAnalyzer::handle_inst(Instruction *inst, FunctionWrapper * parent_func) { //outs()<<*inst<<"\n"; outs().flush(); switch (inst->getOpcode()) { // common/bitwise binary operations // Terminator instructions case Instruction::Ret: { ReturnInst* retInst = ((ReturnInst*) inst); if (retInst->getNumOperands() > 0 && !retInst->getOperandUse(0)->getType()->isVoidTy()) { parent_func->addRet(retInst->getOperandUse(0)); } } break; case Instruction::Resume: { Value* resume = ((ResumeInst*) inst)->getOperand(0); parent_func->addResume(resume); } break; case Instruction::Switch: case Instruction::Br: case Instruction::IndirectBr: case Instruction::Unreachable: break; // vector operations case Instruction::ExtractElement: { } break; case Instruction::InsertElement: { } break; case Instruction::ShuffleVector: { } break; // aggregate operations case Instruction::ExtractValue: { Value * agg = ((ExtractValueInst*) inst)->getAggregateOperand(); DyckVertex* aggV = wrapValue(agg); Type* aggTy = agg->getType(); ArrayRef<unsigned> indices = ((ExtractValueInst*) inst)->getIndices(); DyckVertex* currentStruct = aggV; for (unsigned int i = 0; i < indices.size(); i++) { if (isa<CompositeType>(aggTy) && aggTy->isSized()) { if (!aggTy->isStructTy()) { aggTy = ((CompositeType*) aggTy)->getTypeAtIndex(indices[i]); #ifndef ARRAY_SIMPLIFIED current = addPtrOffset(current, (int) indices[i] * dl.getTypeAllocSize(aggTy), dgraph); #endif if (i == indices.size() - 1) { this->makeAlias(currentStruct, wrapValue(inst)); } } else { aggTy = ((CompositeType*) aggTy)->getTypeAtIndex(indices[i]); if (i != indices.size() - 1) { currentStruct = this->addField(currentStruct, -2 - (int) indices[i], NULL); } else { currentStruct = this->addField(currentStruct, -2 - (int) indices[i], wrapValue(inst)); } } } else { break; } } } break; case Instruction::InsertValue: { DyckVertex* resultV = wrapValue(inst); Value * agg = ((InsertValueInst*) inst)->getAggregateOperand(); if (!isa<UndefValue>(agg)) { makeAlias(resultV, wrapValue(agg)); } Value * val = ((InsertValueInst*) inst)->getInsertedValueOperand(); DyckVertex* insertedVal = wrapValue(val); Type *aggTy = inst->getType(); ArrayRef<unsigned> indices = ((InsertValueInst*) inst)->getIndices(); DyckVertex* currentStruct = resultV; for (unsigned int i = 0; i < indices.size(); i++) { if (isa<CompositeType>(aggTy) && aggTy->isSized()) { if (!aggTy->isStructTy()) { aggTy = ((CompositeType*) aggTy)->getTypeAtIndex(indices[i]); #ifndef ARRAY_SIMPLIFIED current = addPtrOffset(current, (int) indices[i] * dl.getTypeAllocSize(aggTy), dgraph); #endif if (i == indices.size() - 1) { this->makeAlias(currentStruct, insertedVal); } } else { aggTy = ((CompositeType*) aggTy)->getTypeAtIndex(indices[i]); if (i != indices.size() - 1) { currentStruct = this->addField(currentStruct, -2 - (int) indices[i], NULL); } else { currentStruct = this->addField(currentStruct, -2 - (int) indices[i], insertedVal); } } } else { break; } } } break; // memory accessing and addressing operations case Instruction::Alloca: { } break; case Instruction::Fence: { } break; case Instruction::AtomicCmpXchg: { Value * retXchg = inst; Value * ptrXchg = inst->getOperand(0); Value * newXchg = inst->getOperand(2); addPtrTo(wrapValue(ptrXchg), wrapValue(retXchg)); addPtrTo(wrapValue(ptrXchg), wrapValue(newXchg)); } break; case Instruction::AtomicRMW: { Value * retRmw = inst; Value * ptrRmw = ((AtomicRMWInst*) inst)->getPointerOperand(); addPtrTo(wrapValue(ptrRmw), wrapValue(retRmw)); switch (((AtomicRMWInst*) inst)->getOperation()) { case AtomicRMWInst::Max: case AtomicRMWInst::Min: case AtomicRMWInst::UMax: case AtomicRMWInst::UMin: case AtomicRMWInst::Xchg: { Value * newRmw = ((AtomicRMWInst*) inst)->getValOperand(); addPtrTo(wrapValue(ptrRmw), wrapValue(newRmw)); } break; default: //others are binary ops like add/sub/... ///@TODO break; } } break; case Instruction::Load: { Value *lval = inst; Value *ladd = inst->getOperand(0); addPtrTo(wrapValue(ladd), wrapValue(lval)); } break; case Instruction::Store: { Value * sval = inst->getOperand(0); Value * sadd = inst->getOperand(1); addPtrTo(wrapValue(sadd), wrapValue(sval)); } break; case Instruction::GetElementPtr: { makeAlias(wrapValue(inst), handle_gep((GEPOperator*) inst)); } break; // conversion operations case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::BitCast: case Instruction::PtrToInt: case Instruction::IntToPtr: { Value * itpv = inst->getOperand(0); makeAlias(wrapValue(inst), wrapValue(itpv)); } break; // other operations case Instruction::Invoke: // invoke is a terminal operation { InvokeInst * invoke = (InvokeInst*) inst; LandingPadInst* lpd = invoke->getLandingPadInst(); parent_func->addLandingPad(invoke, lpd); Value * cv = invoke->getCalledValue(); vector<Value*> args; for (unsigned i = 0; i < invoke->getNumArgOperands(); i++) { args.push_back(invoke->getArgOperand(i)); } this->handle_invoke_call_inst(invoke, cv, &args, parent_func); } break; case Instruction::Call: { CallInst * callinst = (CallInst*) inst; if (callinst->isInlineAsm()) { break; } Value * cv = callinst->getCalledValue(); vector<Value*> args; for (unsigned i = 0; i < callinst->getNumArgOperands(); i++) { args.push_back(callinst->getArgOperand(i)); } this->handle_invoke_call_inst(callinst, cv, &args, parent_func); } break; case Instruction::PHI: { PHINode *phi = (PHINode *) inst; int nums = phi->getNumIncomingValues(); for (int i = 0; i < nums; i++) { Value * p = phi->getIncomingValue(i); makeAlias(wrapValue(inst), wrapValue(p)); } } break; case Instruction::Select: { Value *first = ((SelectInst*) inst)->getTrueValue(); Value *second = ((SelectInst*) inst)->getFalseValue(); makeAlias(wrapValue(inst), wrapValue(first)); makeAlias(wrapValue(inst), wrapValue(second)); } break; case Instruction::VAArg: { parent_func->addVAArg(inst); DyckVertex* vaarg = wrapValue(inst); Value * ptrVaarg = inst->getOperand(0); addPtrTo(wrapValue(ptrVaarg), vaarg); } break; case Instruction::LandingPad: // handled with invoke inst case Instruction::ICmp: case Instruction::FCmp: default: break; } }
// visitReturnInst - We want to destroy the setjmp map upon exit from the // function. void LowerSetJmp::visitReturnInst(ReturnInst &RI) { Function* Func = RI.getParent()->getParent(); CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &RI); }
void Lint::visitReturnInst(ReturnInst &I) { Function *F = I.getParent()->getParent(); Assert1(!F->doesNotReturn(), "Unusual: Return statement in function with noreturn attribute", &I); }
/// InsertStackProtectors - Insert code into the prologue and epilogue of the /// function. /// /// - The prologue code loads and stores the stack guard onto the stack. /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { bool SupportsSelectionDAGSP = EnableSelectionDAGSP && !TM->Options.EnableFastISel; AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. Value *StackGuardVar = nullptr; // The stack guard variable. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { BasicBlock *BB = &*I++; ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); if (!RI) continue; if (!HasPrologue) { HasPrologue = true; SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, AI, StackGuardVar); } if (!SupportsSelectionDAGSP) { // If we do not support SelectionDAG based tail calls, generate IR level // tail calls. // // For each block with a return instruction, convert this: // // return: // ... // ret ... // // into this: // // return: // ... // %1 = load __stack_chk_guard // %2 = load StackGuardSlot // %3 = cmp i1 %1, %2 // br i1 %3, label %SP_return, label %CallStackCheckFailBlk // // SP_return: // ret ... // // CallStackCheckFailBlk: // call void @__stack_chk_fail() // unreachable // Create the FailBB. We duplicate the BB every time since the MI tail // merge pass will merge together all of the various BB into one including // fail BB generated by the stack protector pseudo instruction. BasicBlock *FailBB = CreateFailBB(); // Set HasIRCheck to true, so that SelectionDAG will not generate its own // version. HasIRCheck = true; // Split the basic block before the return instruction. BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return"); // Update the dominator tree if we need to. if (DT && DT->isReachableFromEntry(BB)) { DT->addNewBlock(NewBB, BB); DT->addNewBlock(FailBB, BB); } // Remove default branch instruction to the new BB. BB->getTerminator()->eraseFromParent(); // Move the newly created basic block to the point right after the old // basic block so that it's in the "fall through" position. NewBB->moveAfter(BB); // Generate the stack protector instructions in the old basic block. IRBuilder<> B(BB); LoadInst *LI1 = B.CreateLoad(StackGuardVar); LoadInst *LI2 = B.CreateLoad(AI); Value *Cmp = B.CreateICmpEQ(LI1, LI2); auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true); auto FailureProb = BranchProbabilityInfo::getBranchProbStackProtector(false); MDNode *Weights = MDBuilder(F->getContext()) .createBranchWeights(SuccessProb.getNumerator(), FailureProb.getNumerator()); B.CreateCondBr(Cmp, NewBB, FailBB, Weights); } } // Return if we didn't modify any basic blocks. i.e., there are no return // statements in the function. return HasPrologue; }
// InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. // // Note that this only does one level of inlining. For example, if the // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now // exists in the instruction stream. Similiarly this will inline a recursive // function by one level. // bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD) { Instruction *TheCall = CS.getInstruction(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; // If the call to the callee is a non-tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = isa<CallInst>(TheCall) && !cast<CallInst>(TheCall)->isTailCall(); BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. // Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned // function. std::vector<ReturnInst*> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy ValueMap after cloning. DenseMap<const Value*, Value*> ValueMap; // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. assert(std::distance(CalledFunc->arg_begin(), CalledFunc->arg_end()) == std::distance(CS.arg_begin(), CS.arg_end()) && "No varargs calls can be inlined!"); CallSite::arg_iterator AI = CS.arg_begin(); for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI) ValueMap[I] = *AI; // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i", &InlinedFunctionInfo, TD); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. if (CG) UpdateCallGraphAfterInlining(Caller, CalledFunc, FirstNewBlock, ValueMap, *CG); } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. // { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) if (AllocaInst *AI = dyn_cast<AllocaInst>(I++)) { // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (isa<Constant>(AI->getArraySize())) { // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) ++I; // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. Caller->getEntryBlock().getInstList().splice( InsertPoint, FirstNewBlock->getInstList(), AI, I); } } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); const Type *BytePtr = PointerType::get(Type::Int8Ty); // Get the two intrinsics we care about. Constant *StackSave, *StackRestore; StackSave = M->getOrInsertFunction("llvm.stacksave", BytePtr, NULL); StackRestore = M->getOrInsertFunction("llvm.stackrestore", Type::VoidTy, BytePtr, NULL); // If we are preserving the callgraph, add edges to the stacksave/restore // functions for the calls we insert. CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0; if (CG) { // We know that StackSave/StackRestore are Function*'s, because they are // intrinsics which must have the right types. StackSaveCGN = CG->getOrInsertFunction(cast<Function>(StackSave)); StackRestoreCGN = CG->getOrInsertFunction(cast<Function>(StackRestore)); CallerNode = (*CG)[Caller]; } // Insert the llvm.stacksave. CallInst *SavedPtr = new CallInst(StackSave, "savedstack", FirstNewBlock->begin()); if (CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { CallInst *CI = new CallInst(StackRestore, SavedPtr, "", Returns[i]); if (CG) CallerNode->addCalledFunction(CI, StackRestoreCGN); } // Count the number of StackRestore calls we insert. unsigned NumStackRestores = Returns.size(); // If we are inlining an invoke instruction, insert restores before each // unwind. These unwinds will be rewritten into branches later. if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { new CallInst(StackRestore, SavedPtr, "", UI); ++NumStackRestores; } } } // If we are inlining tail call instruction through a call site that isn't // marked 'tail', we must remove the tail marker for any calls in the inlined // code. if (MustClearTailCallFlags && InlinedFunctionInfo.ContainsCalls) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) CI->setTailCall(false); } // If we are inlining for an invoke instruction, we must make sure to rewrite // any inlined 'unwind' instructions into branches to the invoke exception // destination, and call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) new BranchInst(II->getNormalDest(), TheCall); // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!TheCall->use_empty()) TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); // Since we are now done with the Call/Invoke, we can delete it. TheCall->getParent()->getInstList().erase(TheCall); // Since we are now done with the return instruction, delete it also. Returns[0]->getParent()->getInstList().erase(Returns[0]); // We are now done with the inlining. return true; } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... BranchInst *NewBr = new BranchInst(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(NewBr, CalledFunc->getName()+".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(TheCall, CalledFunc->getName()+".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. // Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. // PHINode *PHI = 0; if (!TheCall->use_empty()) { PHI = new PHINode(CalledFunc->getReturnType(), TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. // TheCall->replaceAllUsesWith(PHI); } // Loop over all of the return instructions, turning them into unconditional // branches to the merge point now, and adding entries to the PHI node as // appropriate. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; if (PHI) { assert(RI->getReturnValue() && "Ret should have value!"); assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } // Add a branch to the merge point where the PHI node lives if it exists. new BranchInst(AfterCallBB, RI); // Delete the return instruction now RI->getParent()->getInstList().erase(RI); } } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!TheCall->use_empty()) TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. BasicBlock *ReturnBB = Returns[0]->getParent(); AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); // Update PHI nodes that use the ReturnBB to use the AfterCallBB. ReturnBB->replaceAllUsesWith(AfterCallBB); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!TheCall->use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes // Remove the unconditional branch. OrigBB->getInstList().erase(Br); // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); return true; }
bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { if (skipFunction(F)) return false; // TODO: Could probably handle variadic functions. if (F.isVarArg() || F.hasStructRetAttr() || AMDGPU::isEntryFunctionCC(F.getCallingConv())) return false; MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); unsigned ReturnNumRegs = 0; SmallSet<int, 4> OutArgIndexes; SmallVector<Type *, 4> ReturnTypes; Type *RetTy = F.getReturnType(); if (!RetTy->isVoidTy()) { ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4; if (ReturnNumRegs >= MaxNumRetRegs) return false; ReturnTypes.push_back(RetTy); } SmallVector<Argument *, 4> OutArgs; for (Argument &Arg : F.args()) { if (isOutArgumentCandidate(Arg)) { LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg << " in function " << F.getName() << '\n'); OutArgs.push_back(&Arg); } } if (OutArgs.empty()) return false; using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>; DenseMap<ReturnInst *, ReplacementVec> Replacements; SmallVector<ReturnInst *, 4> Returns; for (BasicBlock &BB : F) { if (ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back())) Returns.push_back(RI); } if (Returns.empty()) return false; bool Changing; do { Changing = false; // Keep retrying if we are able to successfully eliminate an argument. This // helps with cases with multiple arguments which may alias, such as in a // sincos implemntation. If we have 2 stores to arguments, on the first // attempt the MDA query will succeed for the second store but not the // first. On the second iteration we've removed that out clobbering argument // (by effectively moving it into another function) and will find the second // argument is OK to move. for (Argument *OutArg : OutArgs) { bool ThisReplaceable = true; SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores; Type *ArgTy = OutArg->getType()->getPointerElementType(); // Skip this argument if converting it will push us over the register // count to return limit. // TODO: This is an approximation. When legalized this could be more. We // can ask TLI for exactly how many. unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4; if (ArgNumRegs + ReturnNumRegs > MaxNumRetRegs) continue; // An argument is convertible only if all exit blocks are able to replace // it. for (ReturnInst *RI : Returns) { BasicBlock *BB = RI->getParent(); MemDepResult Q = MDA->getPointerDependencyFrom(MemoryLocation(OutArg), true, BB->end(), BB, RI); StoreInst *SI = nullptr; if (Q.isDef()) SI = dyn_cast<StoreInst>(Q.getInst()); if (SI) { LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n'); ReplaceableStores.emplace_back(RI, SI); } else { ThisReplaceable = false; break; } } if (!ThisReplaceable) continue; // Try the next argument candidate. for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) { Value *ReplVal = Store.second->getValueOperand(); auto &ValVec = Replacements[Store.first]; if (llvm::find_if(ValVec, [OutArg](const std::pair<Argument *, Value *> &Entry) { return Entry.first == OutArg;}) != ValVec.end()) { LLVM_DEBUG(dbgs() << "Saw multiple out arg stores" << *OutArg << '\n'); // It is possible to see stores to the same argument multiple times, // but we expect these would have been optimized out already. ThisReplaceable = false; break; } ValVec.emplace_back(OutArg, ReplVal); Store.second->eraseFromParent(); } if (ThisReplaceable) { ReturnTypes.push_back(ArgTy); OutArgIndexes.insert(OutArg->getArgNo()); ++NumOutArgumentsReplaced; Changing = true; } } } while (Changing); if (Replacements.empty()) return false; LLVMContext &Ctx = F.getParent()->getContext(); StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName()); FunctionType *NewFuncTy = FunctionType::get(NewRetTy, F.getFunctionType()->params(), F.isVarArg()); LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n'); Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage, F.getName() + ".body"); F.getParent()->getFunctionList().insert(F.getIterator(), NewFunc); NewFunc->copyAttributesFrom(&F); NewFunc->setComdat(F.getComdat()); // We want to preserve the function and param attributes, but need to strip // off any return attributes, e.g. zeroext doesn't make sense with a struct. NewFunc->stealArgumentListFrom(F); AttrBuilder RetAttrs; RetAttrs.addAttribute(Attribute::SExt); RetAttrs.addAttribute(Attribute::ZExt); RetAttrs.addAttribute(Attribute::NoAlias); NewFunc->removeAttributes(AttributeList::ReturnIndex, RetAttrs); // TODO: How to preserve metadata? // Move the body of the function into the new rewritten function, and replace // this function with a stub. NewFunc->getBasicBlockList().splice(NewFunc->begin(), F.getBasicBlockList()); for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) { ReturnInst *RI = Replacement.first; IRBuilder<> B(RI); B.SetCurrentDebugLocation(RI->getDebugLoc()); int RetIdx = 0; Value *NewRetVal = UndefValue::get(NewRetTy); Value *RetVal = RI->getReturnValue(); if (RetVal) NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++); for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) { Argument *Arg = ReturnPoint.first; Value *Val = ReturnPoint.second; Type *EltTy = Arg->getType()->getPointerElementType(); if (Val->getType() != EltTy) { Type *EffectiveEltTy = EltTy; if (StructType *CT = dyn_cast<StructType>(EltTy)) { assert(CT->getNumElements() == 1); EffectiveEltTy = CT->getElementType(0); } if (DL->getTypeSizeInBits(EffectiveEltTy) != DL->getTypeSizeInBits(Val->getType())) { assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType())); Val = B.CreateShuffleVector(Val, UndefValue::get(Val->getType()), { 0, 1, 2 }); } Val = B.CreateBitCast(Val, EffectiveEltTy); // Re-create single element composite. if (EltTy != EffectiveEltTy) Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0); } NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++); } if (RetVal) RI->setOperand(0, NewRetVal); else { B.CreateRet(NewRetVal); RI->eraseFromParent(); } } SmallVector<Value *, 16> StubCallArgs; for (Argument &Arg : F.args()) { if (OutArgIndexes.count(Arg.getArgNo())) { // It's easier to preserve the type of the argument list. We rely on // DeadArgumentElimination to take care of these. StubCallArgs.push_back(UndefValue::get(Arg.getType())); } else { StubCallArgs.push_back(&Arg); } } BasicBlock *StubBB = BasicBlock::Create(Ctx, "", &F); IRBuilder<> B(StubBB); CallInst *StubCall = B.CreateCall(NewFunc, StubCallArgs); int RetIdx = RetTy->isVoidTy() ? 0 : 1; for (Argument &Arg : F.args()) { if (!OutArgIndexes.count(Arg.getArgNo())) continue; PointerType *ArgType = cast<PointerType>(Arg.getType()); auto *EltTy = ArgType->getElementType(); unsigned Align = Arg.getParamAlignment(); if (Align == 0) Align = DL->getABITypeAlignment(EltTy); Value *Val = B.CreateExtractValue(StubCall, RetIdx++); Type *PtrTy = Val->getType()->getPointerTo(ArgType->getAddressSpace()); // We can peek through bitcasts, so the type may not match. Value *PtrVal = B.CreateBitCast(&Arg, PtrTy); B.CreateAlignedStore(Val, PtrVal, Align); } if (!RetTy->isVoidTy()) { B.CreateRet(B.CreateExtractValue(StubCall, 0)); } else { B.CreateRetVoid(); } // The function is now a stub we want to inline. F.addFnAttr(Attribute::AlwaysInline); ++NumOutArgumentFunctionsReplaced; return true; }