bool ConstantInsertExtractElementIndex::runOnBasicBlock(BasicBlock &BB) { bool Changed = false; if (!DL) DL = &getAnalysis<DataLayoutPass>().getDataLayout(); Instructions OutOfRangeConstantIndices; Instructions NonConstantVectorIndices; findNonConstantInsertExtractElements(BB, OutOfRangeConstantIndices, NonConstantVectorIndices); if (!OutOfRangeConstantIndices.empty()) { Changed = true; fixOutOfRangeConstantIndices(BB, OutOfRangeConstantIndices); } if (!NonConstantVectorIndices.empty()) { Changed = true; fixNonConstantVectorIndices(BB, NonConstantVectorIndices); } return Changed; }
void constructBlocks(Blocks &blocks, Instructions &instructions) { /* Create the first block containing the very first instruction in this script. * Then follow the complete code flow from this instruction onwards. */ assert(blocks.empty()); if (instructions.empty()) return; blocks.push_back(Block(instructions.front().address)); constructBlocks(blocks, blocks.back(), instructions.front()); }
void LowerEmAsyncify::transformAsyncFunction(Function &F, Instructions const& AsyncCalls) { assert(!AsyncCalls.empty()); // Pass 0 // collect all the return instructions from the original function // will use later std::vector<ReturnInst*> OrigReturns; for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { if (ReturnInst *RI = dyn_cast<ReturnInst>(&*I)) { OrigReturns.push_back(RI); } } // Pass 1 // Scan each async call and make the basic structure: // All these will be cloned into the callback functions // - allocate the async context before calling an async function // - check async right after calling an async function, save context & return if async, continue if not // - retrieve the async return value and free the async context if the called function turns out to be sync std::vector<AsyncCallEntry> AsyncCallEntries; AsyncCallEntries.reserve(AsyncCalls.size()); for (Instructions::const_iterator I = AsyncCalls.begin(), E = AsyncCalls.end(); I != E; ++I) { // prepare blocks Instruction *CurAsyncCall = *I; // The block containing the async call BasicBlock *CurBlock = CurAsyncCall->getParent(); // The block should run after the async call BasicBlock *AfterCallBlock = SplitBlock(CurBlock, CurAsyncCall->getNextNode()); // The block where we store the context and return BasicBlock *SaveAsyncCtxBlock = BasicBlock::Create(TheModule->getContext(), "SaveAsyncCtx", &F, AfterCallBlock); // return a dummy value at the end, to make the block valid new UnreachableInst(TheModule->getContext(), SaveAsyncCtxBlock); // allocate the context before making the call // we don't know the size yet, will fix it later // we cannot insert the instruction later because, // we need to make sure that all the instructions and blocks are fixed before we can generate DT and find context variables // In CallHandler.h `sp` will be put as the second parameter // such that we can take a note of the original sp CallInst *AllocAsyncCtxInst = CallInst::Create(AllocAsyncCtxFunction, Constant::getNullValue(I32), "AsyncCtx", CurAsyncCall); // Right after the call // check async and return if so // TODO: we can define truly async functions and partial async functions { // remove old terminator, which came from SplitBlock CurBlock->getTerminator()->eraseFromParent(); // go to SaveAsyncCtxBlock if the previous call is async // otherwise just continue to AfterCallBlock CallInst *CheckAsync = CallInst::Create(CheckAsyncFunction, "IsAsync", CurBlock); BranchInst::Create(SaveAsyncCtxBlock, AfterCallBlock, CheckAsync, CurBlock); } // take a note of this async call AsyncCallEntry CurAsyncCallEntry; CurAsyncCallEntry.AsyncCallInst = CurAsyncCall; CurAsyncCallEntry.AfterCallBlock = AfterCallBlock; CurAsyncCallEntry.AllocAsyncCtxInst = AllocAsyncCtxInst; CurAsyncCallEntry.SaveAsyncCtxBlock = SaveAsyncCtxBlock; // create an empty function for the callback, which will be constructed later CurAsyncCallEntry.CallbackFunc = Function::Create(CallbackFunctionType, F.getLinkage(), F.getName() + "__async_cb", TheModule); AsyncCallEntries.push_back(CurAsyncCallEntry); } // Pass 2 // analyze the context variables and construct SaveAsyncCtxBlock for each async call // also calculate the size of the context and allocate the async context accordingly for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { AsyncCallEntry & CurEntry = *EI; // Collect everything to be saved FindContextVariables(CurEntry); // Pack the variables as a struct { // TODO: sort them from large memeber to small ones, in order to make the struct compact even when aligned SmallVector<Type*, 8> Types; Types.push_back(CallbackFunctionType->getPointerTo()); for (Values::iterator VI = CurEntry.ContextVariables.begin(), VE = CurEntry.ContextVariables.end(); VI != VE; ++VI) { Types.push_back((*VI)->getType()); } CurEntry.ContextStructType = StructType::get(TheModule->getContext(), Types); } // fix the size of allocation CurEntry.AllocAsyncCtxInst->setOperand(0, ConstantInt::get(I32, DL->getTypeStoreSize(CurEntry.ContextStructType))); // construct SaveAsyncCtxBlock { // fill in SaveAsyncCtxBlock // temporarily remove the terminator for convenience CurEntry.SaveAsyncCtxBlock->getTerminator()->eraseFromParent(); assert(CurEntry.SaveAsyncCtxBlock->empty()); Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo(); BitCastInst *AsyncCtxAddr = new BitCastInst(CurEntry.AllocAsyncCtxInst, AsyncCtxAddrTy, "AsyncCtxAddr", CurEntry.SaveAsyncCtxBlock); SmallVector<Value*, 2> Indices; // store the callback { Indices.push_back(ConstantInt::get(I32, 0)); Indices.push_back(ConstantInt::get(I32, 0)); GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); new StoreInst(CurEntry.CallbackFunc, AsyncVarAddr, CurEntry.SaveAsyncCtxBlock); } // store the context variables for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { Indices.clear(); Indices.push_back(ConstantInt::get(I32, 0)); Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element is the callback function GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", CurEntry.SaveAsyncCtxBlock); new StoreInst(CurEntry.ContextVariables[i], AsyncVarAddr, CurEntry.SaveAsyncCtxBlock); } // to exit the block, we want to return without unwinding the stack frame CallInst::Create(DoNotUnwindFunction, "", CurEntry.SaveAsyncCtxBlock); ReturnInst::Create(TheModule->getContext(), (F.getReturnType()->isVoidTy() ? 0 : Constant::getNullValue(F.getReturnType())), CurEntry.SaveAsyncCtxBlock); } } // Pass 3 // now all the SaveAsyncCtxBlock's have been constructed // we can clone F and construct callback functions // we could not construct the callbacks in Pass 2 because we need _all_ those SaveAsyncCtxBlock's appear in _each_ callback for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { AsyncCallEntry & CurEntry = *EI; Function *CurCallbackFunc = CurEntry.CallbackFunc; ValueToValueMapTy VMap; // Add the entry block // load variables from the context // also update VMap for CloneFunction BasicBlock *EntryBlock = BasicBlock::Create(TheModule->getContext(), "AsyncCallbackEntry", CurCallbackFunc); std::vector<LoadInst *> LoadedAsyncVars; { Type *AsyncCtxAddrTy = CurEntry.ContextStructType->getPointerTo(); BitCastInst *AsyncCtxAddr = new BitCastInst(CurCallbackFunc->arg_begin(), AsyncCtxAddrTy, "AsyncCtx", EntryBlock); SmallVector<Value*, 2> Indices; for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { Indices.clear(); Indices.push_back(ConstantInt::get(I32, 0)); Indices.push_back(ConstantInt::get(I32, i + 1)); // the 0th element of AsyncCtx is the callback function GetElementPtrInst *AsyncVarAddr = GetElementPtrInst::Create(AsyncCtxAddrTy, AsyncCtxAddr, Indices, "", EntryBlock); LoadedAsyncVars.push_back(new LoadInst(AsyncVarAddr, "", EntryBlock)); // we want the argument to be replaced by the loaded value if (isa<Argument>(CurEntry.ContextVariables[i])) VMap[CurEntry.ContextVariables[i]] = LoadedAsyncVars.back(); } } // we don't need any argument, just leave dummy entries there to cheat CloneFunctionInto for (Function::const_arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { if (VMap.count(AI) == 0) VMap[AI] = Constant::getNullValue(AI->getType()); } // Clone the function { SmallVector<ReturnInst*, 8> Returns; CloneFunctionInto(CurCallbackFunc, &F, VMap, false, Returns); // return type of the callback functions is always void // need to fix the return type if (!F.getReturnType()->isVoidTy()) { // for those return instructions that are from the original function // it means we are 'truly' leaving this function // need to store the return value right before ruturn for (size_t i = 0; i < OrigReturns.size(); ++i) { ReturnInst *RI = cast<ReturnInst>(VMap[OrigReturns[i]]); // Need to store the return value into the global area CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", RI); BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, F.getReturnType()->getPointerTo(), "AsyncRetValAddr", RI); new StoreInst(RI->getOperand(0), RetValAddr, RI); } // we want to unwind the stack back to where it was before the original function as called // but we don't actually need to do this here // at this point it must be true that no callback is pended // so the scheduler will correct the stack pointer and pop the frame // here we just fix the return type for (size_t i = 0; i < Returns.size(); ++i) { ReplaceInstWithInst(Returns[i], ReturnInst::Create(TheModule->getContext())); } } } // the callback function does not have any return value // so clear all the attributes for return { AttributeSet Attrs = CurCallbackFunc->getAttributes(); CurCallbackFunc->setAttributes( Attrs.removeAttributes(TheModule->getContext(), AttributeSet::ReturnIndex, Attrs.getRetAttributes()) ); } // in the callback function, we never allocate a new async frame // instead we reuse the existing one for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { Instruction *I = cast<Instruction>(VMap[EI->AllocAsyncCtxInst]); ReplaceInstWithInst(I, CallInst::Create(ReallocAsyncCtxFunction, I->getOperand(0), "ReallocAsyncCtx")); } // mapped entry point & async call BasicBlock *ResumeBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]); Instruction *MappedAsyncCall = cast<Instruction>(VMap[CurEntry.AsyncCallInst]); // To save space, for each async call in the callback function, we just ignore the sync case, and leave it to the scheduler // TODO need an option for this { for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { AsyncCallEntry & CurEntry = *EI; Instruction *MappedAsyncCallInst = cast<Instruction>(VMap[CurEntry.AsyncCallInst]); BasicBlock *MappedAsyncCallBlock = MappedAsyncCallInst->getParent(); BasicBlock *MappedAfterCallBlock = cast<BasicBlock>(VMap[CurEntry.AfterCallBlock]); // for the sync case of the call, go to NewBlock (instead of MappedAfterCallBlock) BasicBlock *NewBlock = BasicBlock::Create(TheModule->getContext(), "", CurCallbackFunc, MappedAfterCallBlock); MappedAsyncCallBlock->getTerminator()->setSuccessor(1, NewBlock); // store the return value if (!MappedAsyncCallInst->use_empty()) { CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", NewBlock); BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCallInst->getType()->getPointerTo(), "AsyncRetValAddr", NewBlock); new StoreInst(MappedAsyncCallInst, RetValAddr, NewBlock); } // tell the scheduler that we want to keep the current async stack frame CallInst::Create(DoNotUnwindAsyncFunction, "", NewBlock); // finally we go to the SaveAsyncCtxBlock, to register the callbac, save the local variables and leave BasicBlock *MappedSaveAsyncCtxBlock = cast<BasicBlock>(VMap[CurEntry.SaveAsyncCtxBlock]); BranchInst::Create(MappedSaveAsyncCtxBlock, NewBlock); } } std::vector<AllocaInst*> ToPromote; // applying loaded variables in the entry block { BasicBlockSet ReachableBlocks = FindReachableBlocksFrom(ResumeBlock); for (size_t i = 0; i < CurEntry.ContextVariables.size(); ++i) { Value *OrigVar = CurEntry.ContextVariables[i]; if (isa<Argument>(OrigVar)) continue; // already processed Value *CurVar = VMap[OrigVar]; assert(CurVar != MappedAsyncCall); if (Instruction *Inst = dyn_cast<Instruction>(CurVar)) { if (ReachableBlocks.count(Inst->getParent())) { // Inst could be either defined or loaded from the async context // Do the dirty works in memory // TODO: might need to check the safety first // TODO: can we create phi directly? AllocaInst *Addr = DemoteRegToStack(*Inst, false); new StoreInst(LoadedAsyncVars[i], Addr, EntryBlock); ToPromote.push_back(Addr); } else { // The parent block is not reachable, which means there is no confliction // it's safe to replace Inst with the loaded value assert(Inst != LoadedAsyncVars[i]); // this should only happen when OrigVar is an Argument Inst->replaceAllUsesWith(LoadedAsyncVars[i]); } } } } // resolve the return value of the previous async function // it could be the value just loaded from the global area // or directly returned by the function (in its sync case) if (!CurEntry.AsyncCallInst->use_empty()) { // load the async return value CallInst *RawRetValAddr = CallInst::Create(GetAsyncReturnValueAddrFunction, "", EntryBlock); BitCastInst *RetValAddr = new BitCastInst(RawRetValAddr, MappedAsyncCall->getType()->getPointerTo(), "AsyncRetValAddr", EntryBlock); LoadInst *RetVal = new LoadInst(RetValAddr, "AsyncRetVal", EntryBlock); AllocaInst *Addr = DemoteRegToStack(*MappedAsyncCall, false); new StoreInst(RetVal, Addr, EntryBlock); ToPromote.push_back(Addr); } // TODO remove unreachable blocks before creating phi // We go right to ResumeBlock from the EntryBlock BranchInst::Create(ResumeBlock, EntryBlock); /* * Creating phi's * Normal stack frames and async stack frames are interleaving with each other. * In a callback function, if we call an async function, we might need to realloc the async ctx. * at this point we don't want anything stored after the ctx, * such that we can free and extend the ctx by simply update STACKTOP. * Therefore we don't want any alloca's in callback functions. * */ if (!ToPromote.empty()) { DominatorTreeWrapperPass DTW; DTW.runOnFunction(*CurCallbackFunc); PromoteMemToReg(ToPromote, DTW.getDomTree()); } removeUnreachableBlocks(*CurCallbackFunc); } // Pass 4 // Here are modifications to the original function, which we won't want to be cloned into the callback functions for (std::vector<AsyncCallEntry>::iterator EI = AsyncCallEntries.begin(), EE = AsyncCallEntries.end(); EI != EE; ++EI) { AsyncCallEntry & CurEntry = *EI; // remove the frame if no async functinon has been called CallInst::Create(FreeAsyncCtxFunction, CurEntry.AllocAsyncCtxInst, "", CurEntry.AfterCallBlock->getFirstNonPHI()); } }
// Input: // - SExtInsts contains all the sext instructions that are used directly in // GetElementPtrInst, i.e., access to memory. // Algorithm: // - For each sext operation in SExtInsts: // Let var be the operand of sext. // while it is profitable (see shouldGetThrough), legal, and safe // (see canGetThrough) to move sext through var's definition: // * promote the type of var's definition. // * fold var into sext uses. // * move sext above var's definition. // * update sext operand to use the operand of var that should be sign // extended (by construction there is only one). // // E.g., // a = ... i32 c, 3 // b = sext i32 a to i64 <- is it legal/safe/profitable to get through 'a' // ... // = b // => Yes, update the code // b = sext i32 c to i64 // a = ... i64 b, 3 // ... // = a // Iterate on 'c'. bool AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) { DEBUG(dbgs() << "*** Propagate Sign Extension ***\n"); bool LocalChange = false; SetOfInstructions ToRemove; ValueToInsts ValToSExtendedUses; while (!SExtInsts.empty()) { // Get through simple chain. Instruction *SExt = SExtInsts.pop_back_val(); DEBUG(dbgs() << "Consider:\n" << *SExt << '\n'); // If this SExt has already been merged continue. if (SExt->use_empty() && ToRemove.count(SExt)) { DEBUG(dbgs() << "No uses => marked as delete\n"); continue; } // Now try to get through the chain of definitions. while (auto *Inst = dyn_cast<Instruction>(SExt->getOperand(0))) { DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n'); if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) { // We cannot get through something that is not an Instruction // or not safe to SExt. DEBUG(dbgs() << "Cannot get through\n"); break; } LocalChange = true; // If this is a sign extend, it becomes useless. if (isa<SExtInst>(Inst) || isa<TruncInst>(Inst)) { DEBUG(dbgs() << "SExt or trunc, mark it as to remove\n"); // We cannot use replaceAllUsesWith here because we may trigger some // assertion on the type as all involved sext operation may have not // been moved yet. while (!Inst->use_empty()) { Use &U = *Inst->use_begin(); Instruction *User = dyn_cast<Instruction>(U.getUser()); assert(User && "User of sext is not an Instruction!"); User->setOperand(U.getOperandNo(), SExt); } ToRemove.insert(Inst); SExt->setOperand(0, Inst->getOperand(0)); SExt->moveBefore(Inst); continue; } // Get through the Instruction: // 1. Update its type. // 2. Replace the uses of SExt by Inst. // 3. Sign extend each operand that needs to be sign extended. // Step #1. Inst->mutateType(SExt->getType()); // Step #2. SExt->replaceAllUsesWith(Inst); // Step #3. Instruction *SExtForOpnd = SExt; DEBUG(dbgs() << "Propagate SExt to operands\n"); for (int OpIdx = 0, EndOpIdx = Inst->getNumOperands(); OpIdx != EndOpIdx; ++OpIdx) { DEBUG(dbgs() << "Operand:\n" << *(Inst->getOperand(OpIdx)) << '\n'); if (Inst->getOperand(OpIdx)->getType() == SExt->getType() || !shouldSExtOperand(Inst, OpIdx)) { DEBUG(dbgs() << "No need to propagate\n"); continue; } // Check if we can statically sign extend the operand. Value *Opnd = Inst->getOperand(OpIdx); if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { DEBUG(dbgs() << "Statically sign extend\n"); Inst->setOperand(OpIdx, ConstantInt::getSigned(SExt->getType(), Cst->getSExtValue())); continue; } // UndefValue are typed, so we have to statically sign extend them. if (isa<UndefValue>(Opnd)) { DEBUG(dbgs() << "Statically sign extend\n"); Inst->setOperand(OpIdx, UndefValue::get(SExt->getType())); continue; } // Otherwise we have to explicity sign extend it. assert(SExtForOpnd && "Only one operand should have been sign extended"); SExtForOpnd->setOperand(0, Opnd); DEBUG(dbgs() << "Move before:\n" << *Inst << "\nSign extend\n"); // Move the sign extension before the insertion point. SExtForOpnd->moveBefore(Inst); Inst->setOperand(OpIdx, SExtForOpnd); // If more sext are required, new instructions will have to be created. SExtForOpnd = nullptr; } if (SExtForOpnd == SExt) { DEBUG(dbgs() << "Sign extension is useless now\n"); ToRemove.insert(SExt); break; } } // If the use is already of the right type, connect its uses to its argument // and delete it. // This can happen for an Instruction all uses of which are sign extended. if (!ToRemove.count(SExt) && SExt->getType() == SExt->getOperand(0)->getType()) { DEBUG(dbgs() << "Sign extension is useless, attach its use to " "its argument\n"); SExt->replaceAllUsesWith(SExt->getOperand(0)); ToRemove.insert(SExt); } else ValToSExtendedUses[SExt->getOperand(0)].push_back(SExt); } if (EnableMerge) mergeSExts(ValToSExtendedUses, ToRemove); // Remove all instructions marked as ToRemove. for (Instruction *I: ToRemove) I->eraseFromParent(); return LocalChange; }