/// This works like CloneAndPruneFunctionInto, except that it does not clone the /// entire function. Instead it starts at an instruction provided by the caller /// and copies (and prunes) only the code reachable from that instruction. void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst *> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); ValueMapTypeRemapper *TypeMapper = nullptr; ValueMaterializer *Materializer = nullptr; #ifndef NDEBUG // If the cloning starts at the beginning of the function, verify that // the function arguments are mapped. if (!StartingInst) for (const Argument &II : OldFunc->args()) assert(VMap.count(&II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges, NameSuffix, CodeInfo); const BasicBlock *StartingBB; if (StartingInst) StartingBB = StartingInst->getParent(); else { StartingBB = &OldFunc->getEntryBlock(); StartingInst = &StartingBB->front(); } // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, BB->begin(), CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (const BasicBlock &BI : *OldFunc) { Value *V = VMap[&BI]; BasicBlock *NewBB = cast_or_null<BasicBlock>(V); if (!NewBB) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Handle PHI nodes specially, as we have to remove references to dead // blocks. for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) { // PHI nodes may have been remapped to non-PHI nodes by the caller or // during the cloning process. if (const PHINode *PN = dyn_cast<PHINode>(I)) { if (isa<PHINode>(VMap[PN])) PHIToResolve.push_back(PN); else break; } else { break; } } // Finally, remap the terminator instructions, as those can't be remapped // until all BBs are mapped. RemapInstruction(NewBB->getTerminator(), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { Value *V = VMap[PN->getIncomingBlock(pred)]; if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[&*OldI] == PN && "VMap mismatch"); VMap[&*OldI] = NV; PN->eraseFromParent(); ++OldI; } } } // Make a second pass over the PHINodes now that all of them have been // remapped into the new function, simplifying the PHINode and performing any // recursive simplifications exposed. This will transparently update the // WeakVH in the VMap. Notably, we rely on that so that if we coalesce // two PHINodes, the iteration over the old PHIs remains valid, and the // mapping will just map us to the new node (which may not even be a PHI // node). for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx) if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]])) recursivelySimplifyInstruction(PN); // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happens all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator(); Function::iterator I = Begin; while (I != NewFunc->end()) { // Check if this block has become dead during inlining or other // simplifications. Note that the first block will appear dead, as it has // not yet been wired up properly. if (I != Begin && (pred_begin(&*I) == pred_end(&*I) || I->getSinglePredecessor() == &*I)) { BasicBlock *DeadBB = &*I++; DeleteDeadBlock(DeadBB); continue; } // We need to simplify conditional branches and switches with a constant // operand. We try to prune these out when cloning, but if the // simplification required looking through PHI nodes, those are only // available after forming the full basic block. That may leave some here, // and we still want to prune the dead code as early as possible. ConstantFoldTerminator(&*I); BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor()) { ++I; continue; } // We shouldn't be able to get single-entry PHI nodes here, as instsimplify // above should have zapped all of them.. assert(!isa<PHINode>(Dest->begin())); // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(&*I); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } // Make a final pass over the basic blocks from the old function to gather // any return instructions which survived folding. We have to do this here // because we can iteratively remove and merge returns above. for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(), E = NewFunc->end(); I != E; ++I) if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) Returns.push_back(RI); }
// Clone OldFunc into NewFunc, transforming the old arguments into references to // VMap values. // void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG for (const Argument &I : OldFunc->args()) assert(VMap.count(&I) && "No mapping from source argument specified!"); #endif // Copy all attributes other than those stored in the AttributeSet. We need // to remap the parameter indices of the AttributeSet. AttributeSet NewAttrs = NewFunc->getAttributes(); NewFunc->copyAttributesFrom(OldFunc); NewFunc->setAttributes(NewAttrs); // Fix up the personality function that got copied over. if (OldFunc->hasPersonalityFn()) NewFunc->setPersonalityFn( MapValue(OldFunc->getPersonalityFn(), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer)); AttributeSet OldAttrs = OldFunc->getAttributes(); // Clone any argument attributes that are present in the VMap. for (const Argument &OldArg : OldFunc->args()) if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) { AttributeSet attrs = OldAttrs.getParamAttributes(OldArg.getArgNo() + 1); if (attrs.getNumSlots() > 0) NewArg->addAttr(attrs); } NewFunc->setAttributes( NewFunc->getAttributes() .addAttributes(NewFunc->getContext(), AttributeSet::ReturnIndex, OldAttrs.getRetAttributes()) .addAttributes(NewFunc->getContext(), AttributeSet::FunctionIndex, OldAttrs.getFnAttributes())); SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; OldFunc->getAllMetadata(MDs); for (auto MD : MDs) NewFunc->addMetadata( MD.first, *MapMetadata(MD.second, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer)); // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of // recursive functions into themselves. // for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { const BasicBlock &BB = *BI; // Create a new basic block and copy instructions into it! BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); // Add basic block mapping. VMap[&BB] = CBB; // It is only legal to clone a function if a block address within that // function is never referenced outside of the function. Given that, we // want to map block addresses from the old function to block addresses in // the clone. (This is different from the generic ValueMapper // implementation, which generates an invalid blockaddress when // cloning a function.) if (BB.hasAddressTaken()) { Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), const_cast<BasicBlock*>(&BB)); VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); } // Note return instructions for the caller. if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) Returns.push_back(RI); } // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (Function::iterator BB = cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(), BE = NewFunc->end(); BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... for (Instruction &II : *BB) RemapInstruction(&II, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); }
void LowerEmAsyncify::transformAsyncFunction(Function &F, Instructions const& AsyncCalls) { assert(!AsyncCalls.empty()); // Pass 0 // collect all the return instructions from the original function // will use later std::vector<ReturnInst*> OrigReturns; for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { if (ReturnInst *RI = dyn_cast<ReturnInst>(&*I)) { OrigReturns.push_back(RI); } } // Pass 1 // Scan each async call and make the basic structure: // All these will be cloned into the callback functions // - allocate the async context before calling an async function // - check async right after calling an async function, save context & return if async, continue if not // - retrieve the async return value and free the async context if the called function turns out to be sync std::vector<AsyncCallEntry> AsyncCallEntries; AsyncCallEntries.reserve(AsyncCalls.size()); for (Instructions::const_iterator I = AsyncCalls.begin(), E = AsyncCalls.end(); I != E; ++I) { // prepare blocks Instruction *CurAsyncCall = *I; // The block containing the async call BasicBlock *CurBlock = CurAsyncCall->getParent(); // The block should run after the async call BasicBlock *AfterCallBlock = SplitBlock(CurBlock, CurAsyncCall->getNextNode()); // The block where we store the context and return BasicBlock *SaveAsyncCtxBlock = BasicBlock::Create(TheModule->getContext(), "SaveAsyncCtx", &F, AfterCallBlock); // return a dummy value at the end, to make the block valid new UnreachableInst(TheModule->getContext(), SaveAsyncCtxBlock); // allocate the context before making the call // we don't know the size yet, will fix it later // we cannot insert the instruction later because, // we need to make sure that all the instructions and blocks are fixed before we can generate DT and find context variables // In CallHandler.h `sp` will be put as the second parameter // such that we can take a note of the original sp CallInst *AllocAsyncCtxInst = CallInst::Create(AllocAsyncCtxFunction, Constant::getNullValue(I32), "AsyncCtx", CurAsyncCall); // Right after the call // check async and return if so // TODO: we can define truly async functions and partial async functions { // remove old terminator, which came from SplitBlock CurBlock->getTerminator()->eraseFromParent(); // go to SaveAsyncCtxBlock if the previous call is async // otherwise just continue to AfterCallBlock CallInst *CheckAsync = CallInst::Create(CheckAsyncFunction, "IsAsync", CurBlock); BranchInst::Create(SaveAsyncCtxBlock, AfterCallBlock, CheckAsync, CurBlock); } // take a note of this async call AsyncCallEntry CurAsyncCallEntry; CurAsyncCallEntry.AsyncCallInst = CurAsyncCall; CurAsyncCallEntry.AfterCallBlock = AfterCallBlock; CurAsyncCallEntry.AllocAsyncCtxInst = AllocAsyncCtxInst; CurAsyncCallEntry.SaveAsyncCtxBlock = SaveAsyncCtxBlock; // create an empty function for the callback, which will be constructed later CurAsyncCallEntry.CallbackFunc = Function::Create(CallbackFunctionType, F.getLinkage(), F.getName() + "__async_cb", TheModule); AsyncCallEntries.push_back(CurAsyncCallEntry); } // Pass 2 // analyze the context variables and construct SaveAsyncCtxBlock for each async call // also calculate the size of the context and allocate the async context accordingly for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { AsyncCallEntry & CurEntry = *EI; // Collect everything to be saved FindContextVariables(CurEntry); // Pack the variables as a struct { // TODO: sort them from large memeber to small ones, in order to make the struct compact even when aligned SmallVector<Type*, 8> Types; Types.push_back(CallbackFunctionType->getPointerTo()); for (Values::iterator VI = CurEntry.ContextVariables.begin(), VE = CurEntry.ContextVariables.end(); VI != VE; ++VI) { Types.push_back((*VI)->getType()); } CurEntry.ContextStructType = StructType::get(TheModule->getContext(), Types); } // fix the size of allocation CurEntry.AllocAsyncCtxInst->setOperand(0, ConstantInt::get(I32, DL->getTypeStoreSize(CurEntry.ContextStructType))); // construct SaveAsyncCtxBlock { // fill in SaveAsyncCtxBlock // temporarily remove the terminator for convenience CurEntry.SaveAsyncCtxBlock->getTerminator()->eraseFromParent(); assert(CurEntry.SaveAsyncCtxBlock->empty()); Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo(); BitCastInst *AsyncCtxAddr = new BitCastInst(CurEntry.AllocAsyncCtxInst, AsyncCtxAddrTy, "AsyncCtxAddr", CurEntry.SaveAsyncCtxBlock); SmallVector<Value*, 2> Indices; // store the callback { Indices.push_back(ConstantInt::get(I32, 0)); Indices.push_back(ConstantInt::get(I32, 0)); GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); new StoreInst(CurEntry.CallbackFunc, AsyncVarAddr, CurEntry.SaveAsyncCtxBlock); } // store the context variables for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { Indices.clear(); Indices.push_back(ConstantInt::get(I32, 0)); Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element is the callback function GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); new StoreInst(CurEntry.ContextVariables[i], AsyncVarAddr, CurEntry.SaveAsyncCtxBlock); } // to exit the block, we want to return without unwinding the stack frame CallInst::Create(DoNotUnwindFunction, "", CurEntry.SaveAsyncCtxBlock); ReturnInst::Create(TheModule->getContext(), (F.getReturnType()->isVoidTy() ? 0 : Constant::getNullValue(F.getReturnType())), CurEntry.SaveAsyncCtxBlock); } } // Pass 3 // now all the SaveAsyncCtxBlock's have been constructed // we can clone F and construct callback functions // we could not construct the callbacks in Pass 2 because we need _all_ those SaveAsyncCtxBlock's appear in _each_ callback for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { AsyncCallEntry & CurEntry = *EI; Function *CurCallbackFunc = CurEntry.CallbackFunc; ValueToValueMapTy VMap; // Add the entry block // load variables from the context // also update VMap for CloneFunction BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "AsyncCallbackEntry", CurCallbackFunc); std::vector<LoadInst *> LoadedAsyncVars; { Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo(); BitCastInst *AsyncCtxAddr = new BitCastInst(CurCallbackFunc->arg_begin(), AsyncCtxAddrTy, "AsyncCtx", EntryBlock); SmallVector<Value*, 2> Indices; for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { Indices.clear(); Indices.push_back(ConstantInt::get(I32, 0)); Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element of AsyncCtx is the callback function GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", EntryBlock); LoadedAsyncVars.push_back(new LoadInst(AsyncVarAddr, "", EntryBlock)); // we want the argument to be replaced by the loaded value if (isa<Argument>(CurEntry.ContextVariables[i])) VMap[CurEntry.ContextVariables[i]] = LoadedAsyncVars.back(); } } // we don't need any argument, just leave dummy entries there to cheat CloneFunctionInto for (Function::const_arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { if (VMap.count(AI) == 0) VMap[AI] = Constant::getNullValue(AI->getType()); } // Clone the function { SmallVector<ReturnInst*, 8> Returns; CloneFunctionInto(CurCallbackFunc, &F, VMap, false, Returns); // return type of the callback functions is always void // need to fix the return type if (!F.getReturnType()->isVoidTy()) { // for those return instructions that are from the original function // it means we are 'truly' leaving this function // need to store the return value right before ruturn for (size_t i = 0; i < OrigReturns.size(); ++i) { ReturnInst *RI = cast<ReturnInst>(VMap[OrigReturns[i]]); // Need to store the return value into the global area CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", RI); BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, F.getReturnType()->getPointerTo(), "AsyncRetValAddr", RI); new StoreInst(RI->getOperand(0), RetValAddr, RI); } // we want to unwind the stack back to where it was before the original function as called // but we don't actually need to do this here // at this point it must be true that no callback is pended // so the scheduler will correct the stack pointer and pop the frame // here we just fix the return type for (size_t i = 0; i < Returns.size(); ++i) { ReplaceInstWithInst(Returns[i], ReturnInst::Create(TheModule->getContext())); } } } // the callback function does not have any return value // so clear all the attributes for return { AttributeSet Attrs = CurCallbackFunc->getAttributes(); CurCallbackFunc->setAttributes( Attrs.removeAttributes(TheModule->getContext(), AttributeSet::ReturnIndex, Attrs.getRetAttributes()) ); } // in the callback function, we never allocate a new async frame // instead we reuse the existing one for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { Instruction *I = cast<Instruction>(VMap[EI->AllocAsyncCtxInst]); ReplaceInstWithInst(I, CallInst::Create(ReallocAsyncCtxFunction, I->getOperand(0), "ReallocAsyncCtx")); } // mapped entry point & async call BasicBlock *ResumeBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]); Instruction *MappedAsyncCall = cast<Instruction>(VMap[CurEntry.AsyncCallInst]); // To save space, for each async call in the callback function, we just ignore the sync case, and leave it to the scheduler // TODO need an option for this { for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { AsyncCallEntry & CurEntry = *EI; Instruction *MappedAsyncCallInst = cast<Instruction>(VMap[CurEntry.AsyncCallInst]); BasicBlock *MappedAsyncCallBlock = MappedAsyncCallInst->getParent(); BasicBlock *MappedAfterCallBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]); // for the sync case of the call, go to NewBlock (instead of MappedAfterCallBlock) BasicBlock *NewBlock = BasicBlock::Create(TheModule->getContext(), "", CurCallbackFunc, MappedAfterCallBlock); MappedAsyncCallBlock->getTerminator()->setSuccessor(1, NewBlock); // store the return value if (!MappedAsyncCallInst->use_empty()) { CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", NewBlock); BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCallInst->getType()->getPointerTo(), "AsyncRetValAddr", NewBlock); new StoreInst(MappedAsyncCallInst, RetValAddr, NewBlock); } // tell the scheduler that we want to keep the current async stack frame CallInst::Create(DoNotUnwindAsyncFunction, "", NewBlock); // finally we go to the SaveAsyncCtxBlock, to register the callbac, save the local variables and leave BasicBlock *MappedSaveAsyncCtxBlock = cast<BasicBlock>(VMap[CurEntry.SaveAsyncCtxBlock]); BranchInst::Create(MappedSaveAsyncCtxBlock, NewBlock); } } std::vector<AllocaInst*> ToPromote; // applying loaded variables in the entry block { BasicBlockSet ReachableBlocks = FindReachableBlocksFrom(ResumeBlock); for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { Value *OrigVar = CurEntry.ContextVariables[i]; if (isa<Argument>(OrigVar)) continue; // already processed Value *CurVar = VMap[OrigVar]; assert(CurVar != MappedAsyncCall); if (Instruction *Inst = dyn_cast<Instruction>(CurVar)) { if (ReachableBlocks.count(Inst->getParent())) { // Inst could be either defined or loaded from the async context // Do the dirty works in memory // TODO: might need to check the safety first // TODO: can we create phi directly? AllocaInst *Addr = DemoteRegToStack(*Inst, false); new StoreInst(LoadedAsyncVars[i], Addr, EntryBlock); ToPromote.push_back(Addr); } else { // The parent block is not reachable, which means there is no confliction // it's safe to replace Inst with the loaded value assert(Inst != LoadedAsyncVars[i]); // this should only happen when OrigVar is an Argument Inst->replaceAllUsesWith(LoadedAsyncVars[i]); } } } } // resolve the return value of the previous async function // it could be the value just loaded from the global area // or directly returned by the function (in its sync case) if (!CurEntry.AsyncCallInst->use_empty()) { // load the async return value CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", EntryBlock); BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCall->getType()->getPointerTo(), "AsyncRetValAddr", EntryBlock); LoadInst *RetVal = new LoadInst(RetValAddr, "AsyncRetVal", EntryBlock); AllocaInst *Addr = DemoteRegToStack(*MappedAsyncCall, false); new StoreInst(RetVal, Addr, EntryBlock); ToPromote.push_back(Addr); } // TODO remove unreachable blocks before creating phi // We go right to ResumeBlock from the EntryBlock BranchInst::Create(ResumeBlock, EntryBlock); /* * Creating phi's * Normal stack frames and async stack frames are interleaving with each other. * In a callback function, if we call an async function, we might need to realloc the async ctx. * at this point we don't want anything stored after the ctx, * such that we can free and extend the ctx by simply update STACKTOP. * Therefore we don't want any alloca's in callback functions. * */ if (!ToPromote.empty()) { DominatorTreeWrapperPass DTW; DTW.runOnFunction(*CurCallbackFunc); PromoteMemToReg(ToPromote, DTW.getDomTree()); } removeUnreachableBlocks(*CurCallbackFunc); } // Pass 4 // Here are modifications to the original function, which we won't want to be cloned into the callback functions for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { AsyncCallEntry & CurEntry = *EI; // remove the frame if no async functinon has been called CallInst::Create(FreeAsyncCtxFunction, CurEntry.AllocAsyncCtxInst, "", CurEntry.AfterCallBlock->getFirstNonPHI()); } }
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for /// example) a function call with constant arguments is inlined, and those /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, const TargetData *TD, Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), E = OldFunc->arg_end(); II != E; ++II) assert(VMap.count(II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; CloneWorklist.push_back(&OldFunc->getEntryBlock()); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]); if (NewBB == 0) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Loop over all of the instructions in the block, fixing up operand // references as we go. This uses VMap to do all the hard work. // BasicBlock::iterator I = NewBB->begin(); unsigned DbgKind = OldFunc->getContext().getMDKindID("dbg"); MDNode *TheCallMD = NULL; if (TheCall && TheCall->hasMetadata()) TheCallMD = TheCall->getMetadata(DbgKind); // Handle PHI nodes specially, as we have to remove references to dead // blocks. if (PHINode *PN = dyn_cast<PHINode>(I)) { // Skip over all PHI nodes, remembering them for later. BasicBlock::const_iterator OldI = BI->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) { if (I->hasMetadata()) { if (TheCallMD) { if (MDNode *IMD = I->getMetadata(DbgKind)) { MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD); I->setMetadata(DbgKind, NewMD); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. I->setMetadata(DbgKind, 0); } } PHIToResolve.push_back(cast<PHINode>(OldI)); } } // FIXME: // FIXME: // FIXME: Unclone all this metadata stuff. // FIXME: // FIXME: // Otherwise, remap the rest of the instructions normally. for (; I != NewBB->end(); ++I) { if (I->hasMetadata()) { if (TheCallMD) { if (MDNode *IMD = I->getMetadata(DbgKind)) { MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD); I->setMetadata(DbgKind, NewMD); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. I->setMetadata(DbgKind, 0); } } RemapInstruction(I, VMap); } } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) { Value *InVal = MapValue(PN->getIncomingValue(pred), VMap); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(VMap[OldI] == PN && "VMap mismatch"); VMap[OldI] = NV; PN->eraseFromParent(); ++OldI; } } // NOTE: We cannot eliminate single entry phi nodes here, because of // VMap. Single entry phi nodes can have multiple VMap entries // pointing at them. Thus, deleting one would require scanning the VMap // to update any entries in it that would require that. This would be // really slow. } // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]); while (I != NewFunc->end()) { BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } // Note that we can't eliminate uncond branches if the destination has // single-entry PHI nodes. Eliminating the single-entry phi nodes would // require scanning the VMap to update any entries that point to the phi // node. BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { ++I; continue; } // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(I); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } }
// Clone OldFunc into NewFunc, transforming the old arguments into references to // VMap values. // void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, ValueMapTypeRemapper *TypeMapper) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG for (Function::const_arg_iterator I = OldFunc->arg_begin(), E = OldFunc->arg_end(); I != E; ++I) assert(VMap.count(I) && "No mapping from source argument specified!"); #endif // Clone any attributes. if (NewFunc->arg_size() == OldFunc->arg_size()) NewFunc->copyAttributesFrom(OldFunc); else { //Some arguments were deleted with the VMap. Copy arguments one by one for (Function::const_arg_iterator I = OldFunc->arg_begin(), E = OldFunc->arg_end(); I != E; ++I) if (Argument* Anew = dyn_cast<Argument>(VMap[I])) Anew->addAttr( OldFunc->getAttributes() .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() .addAttr(0, OldFunc->getAttributes() .getRetAttributes())); NewFunc->setAttributes(NewFunc->getAttributes() .addAttr(~0, OldFunc->getAttributes() .getFnAttributes())); } // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of // recursive functions into themselves. // for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { const BasicBlock &BB = *BI; // Create a new basic block and copy instructions into it! BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); // Add basic block mapping. VMap[&BB] = CBB; // It is only legal to clone a function if a block address within that // function is never referenced outside of the function. Given that, we // want to map block addresses from the old function to block addresses in // the clone. (This is different from the generic ValueMapper // implementation, which generates an invalid blockaddress when // cloning a function.) if (BB.hasAddressTaken()) { Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), const_cast<BasicBlock*>(&BB)); VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); } // Note return instructions for the caller. if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) Returns.push_back(RI); } // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]), BE = NewFunc->end(); BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) RemapInstruction(II, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper); }
// Clone OldFunc into NewFunc, transforming the old arguments into references to // VMap values. // void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG for (const Argument &I : OldFunc->args()) assert(VMap.count(&I) && "No mapping from source argument specified!"); #endif // Copy all attributes other than those stored in the AttributeList. We need // to remap the parameter indices of the AttributeList. AttributeList NewAttrs = NewFunc->getAttributes(); NewFunc->copyAttributesFrom(OldFunc); NewFunc->setAttributes(NewAttrs); // Fix up the personality function that got copied over. if (OldFunc->hasPersonalityFn()) NewFunc->setPersonalityFn( MapValue(OldFunc->getPersonalityFn(), VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer)); SmallVector<AttributeSet, 4> NewArgAttrs(NewFunc->arg_size()); AttributeList OldAttrs = OldFunc->getAttributes(); // Clone any argument attributes that are present in the VMap. for (const Argument &OldArg : OldFunc->args()) { if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) { NewArgAttrs[NewArg->getArgNo()] = OldAttrs.getParamAttributes(OldArg.getArgNo()); } } NewFunc->setAttributes( AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(), OldAttrs.getRetAttributes(), NewArgAttrs)); bool MustCloneSP = OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent(); DISubprogram *SP = OldFunc->getSubprogram(); if (SP) { assert(!MustCloneSP || ModuleLevelChanges); // Add mappings for some DebugInfo nodes that we don't want duplicated // even if they're distinct. auto &MD = VMap.MD(); MD[SP->getUnit()].reset(SP->getUnit()); MD[SP->getType()].reset(SP->getType()); MD[SP->getFile()].reset(SP->getFile()); // If we're not cloning into the same module, no need to clone the // subprogram if (!MustCloneSP) MD[SP].reset(SP); } SmallVector<std::pair<unsigned, MDNode *>, 1> MDs; OldFunc->getAllMetadata(MDs); for (auto MD : MDs) { NewFunc->addMetadata( MD.first, *MapMetadata(MD.second, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer)); } // When we remap instructions, we want to avoid duplicating inlined // DISubprograms, so record all subprograms we find as we duplicate // instructions and then freeze them in the MD map. DebugInfoFinder DIFinder; // Loop over all of the basic blocks in the function, cloning them as // appropriate. Note that we save BE this way in order to handle cloning of // recursive functions into themselves. // for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { const BasicBlock &BB = *BI; // Create a new basic block and copy instructions into it! BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, SP ? &DIFinder : nullptr); // Add basic block mapping. VMap[&BB] = CBB; // It is only legal to clone a function if a block address within that // function is never referenced outside of the function. Given that, we // want to map block addresses from the old function to block addresses in // the clone. (This is different from the generic ValueMapper // implementation, which generates an invalid blockaddress when // cloning a function.) if (BB.hasAddressTaken()) { Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc), const_cast<BasicBlock*>(&BB)); VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); } // Note return instructions for the caller. if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator())) Returns.push_back(RI); } for (DISubprogram *ISP : DIFinder.subprograms()) { if (ISP != SP) { VMap.MD()[ISP].reset(ISP); } } // Loop over all of the instructions in the function, fixing up operand // references as we go. This uses VMap to do all the hard work. for (Function::iterator BB = cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(), BE = NewFunc->end(); BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... for (Instruction &II : *BB) RemapInstruction(&II, VMap, ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, TypeMapper, Materializer); }