AllocaInst* Variables::changeLocal(Value* value, PointerType* newType) { AllocaInst *oldTarget = dyn_cast<AllocaInst>(value); PointerType* oldPointerType = dyn_cast<PointerType>(oldTarget->getType()); PointerType *oldType = dyn_cast<PointerType>(oldPointerType->getElementType()); AllocaInst *newTarget = NULL; errs() << "Changing the precision of pointer variable \"" << oldTarget->getName() << "\" from " << *oldType << " to " << *newType << ".\n"; if (diffTypes(newType, oldType)) { newTarget = new AllocaInst(newType, getInt32(1), "", oldTarget); // we are not calling getAlignment because in this case double requires 16. Investigate further. unsigned alignment; switch(newType->getElementType()->getTypeID()) { case Type::FloatTyID: alignment = 4; break; case Type::DoubleTyID: alignment = 8; break; case Type::X86_FP80TyID: alignment = 16; break; default: alignment = 0; } newTarget->setAlignment(alignment); // depends on type? 8 for float? 16 for double? newTarget->takeName(oldTarget); // iterating through instructions using old AllocaInst vector<Instruction*> erase; Value::use_iterator it = oldTarget->use_begin(); #ifdef DEBUG errs() << "\nOld target: "; oldTarget->dump(); #endif for(; it != oldTarget->use_end(); it++) { #ifdef DEBUG errs() << "\nA use: "; it->dump(); errs() << "\n===============================\n"; errs() << "\nTransforming use\n"; #endif bool is_erased = Transformer::transform(it, newTarget, oldTarget, newType, oldType, alignment); if (!is_erased) { erase.push_back(dyn_cast<Instruction>(*it)); } #ifdef DEBUG errs() << "\nDone transforming use\n"; #endif } // erasing uses of old instructions for(unsigned int i = 0; i < erase.size(); i++) { erase[i]->eraseFromParent(); } // erase old instruction //oldTarget->eraseFromParent(); #ifdef DEBUG errs() << "DONE ALL TRANSFORMATION FOR POINTER\n"; #endif } else { errs() << "\tNo changes required.\n"; } return newTarget; }
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { // Ensure that the alloca array size argument has type intptr_t, so that // any casting is exposed early. if (DL) { Type *IntPtrTy = DL->getIntPtrType(AI.getType()); if (AI.getArraySize()->getType() != IntPtrTy) { Value *V = Builder->CreateIntCast(AI.getArraySize(), IntPtrTy, false); AI.setOperand(0, V); return &AI; } } // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of // allocas if possible...also skip interleaved debug info // BasicBlock::iterator It = New; while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; // Now that I is pointing to the first non-allocation-inst in the block, // insert our getelementptr instruction... // Type *IdxTy = DL ? DL->getIntPtrType(AI.getType()) : Type::getInt64Ty(AI.getContext()); Value *NullIdx = Constant::getNullValue(IdxTy); Value *Idx[2] = { NullIdx, NullIdx }; Instruction *GEP = GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original // allocation. return ReplaceInstUsesWith(AI, GEP); } else if (isa<UndefValue>(AI.getArraySize())) { return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); } } if (DL && AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) AI.setAlignment(DL->getPrefTypeAlignment(AI.getAllocatedType())); // Move all alloca's of zero byte objects to the entry block and merge them // together. Note that we only do this for alloca's, because malloc should // allocate and return a unique pointer, even for a zero byte allocation. if (DL->getTypeAllocSize(AI.getAllocatedType()) == 0) { // For a zero sized alloca there is no point in doing an array allocation. // This is helpful if the array size is a complicated expression not used // elsewhere. if (AI.isArrayAllocation()) { AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); return &AI; } // Get the first instruction in the entry block. BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); if (FirstInst != &AI) { // If the entry block doesn't start with a zero-size alloca then move // this one to the start of the entry block. There is no problem with // dominance as the array size was forced to a constant earlier already. AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || DL->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { AI.moveBefore(FirstInst); return &AI; } // If the alignment of the entry block alloca is 0 (unspecified), // assign it the preferred alignment. if (EntryAI->getAlignment() == 0) EntryAI->setAlignment( DL->getPrefTypeAlignment(EntryAI->getAllocatedType())); // Replace this zero-sized alloca with the one at the start of the entry // block after ensuring that the address will be aligned enough for both // types. unsigned MaxAlign = std::max(EntryAI->getAlignment(), AI.getAlignment()); EntryAI->setAlignment(MaxAlign); if (AI.getType() != EntryAI->getType()) return new BitCastInst(EntryAI, AI.getType()); return ReplaceInstUsesWith(AI, EntryAI); } } } if (AI.getAlignment()) { // Check to see if this allocation is only modified by a memcpy/memmove from // a constant global whose alignment is equal to or exceeds that of the // allocation. If this is the case, we can change all users to use // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. SmallVector<Instruction *, 4> ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), AI.getAlignment(), DL); if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) EraseInstFromFunction(*ToDelete[i]); Constant *TheSrc = cast<Constant>(Copy->getSource()); Constant *Cast = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); Instruction *NewI = ReplaceInstUsesWith(AI, Cast); EraseInstFromFunction(*Copy); ++NumGlobalCopies; return NewI; } } } // At last, use the generic allocation site handler to aggressively remove // unused allocas. return visitAllocSite(AI); }
// FIXME: Should try to pick the most likely to be profitable allocas first. void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) return; IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I)) { DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); return; } const Function &ContainingFunction = *I.getParent()->getParent(); // Don't promote the alloca to LDS for shader calling conventions as the work // item ID intrinsics are not supported for these calling conventions. // Furthermore not all LDS is available for some of the stages. if (AMDGPU::isShader(ContainingFunction.getCallingConv())) return; // FIXME: We should also try to get this value from the reqd_work_group_size // function attribute if it is available. unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction); const DataLayout &DL = Mod->getDataLayout(); unsigned Align = I.getAlignment(); if (Align == 0) Align = DL.getABITypeAlignment(I.getAllocatedType()); // FIXME: This computed padding is likely wrong since it depends on inverse // usage order. // // FIXME: It is also possible that if we're allowed to use all of the memory // could could end up using more than the maximum due to alignment padding. uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align); uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy); NewSize += AllocSize; if (NewSize > LocalMemLimit) { DEBUG(dbgs() << " " << AllocSize << " bytes of local memory not available to promote\n"); return; } CurrentLocalMemUsage = NewSize; std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, &I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); Function *F = I.getParent()->getParent(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); GV->setAlignment(I.getAlignment()); Value *TCntY, *TCntZ; std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder); Value *TIdX = getWorkitemID(Builder, 0); Value *TIdY = getWorkitemID(Builder, 1); Value *TIdZ = getWorkitemID(Builder, 2); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); Value *Indices[] = { Constant::getNullValue(Type::getInt32Ty(Mod->getContext())), TID }; Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (Value *V : WorkList) { CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) { Value *Src0 = CI->getOperand(0); Type *EltTy = Src0->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); if (isa<ConstantPointerNull>(CI->getOperand(0))) CI->setOperand(0, ConstantPointerNull::get(NewTy)); if (isa<ConstantPointerNull>(CI->getOperand(1))) CI->setOperand(1, ConstantPointerNull::get(NewTy)); continue; } // The operand's value should be corrected on its own. if (isa<AddrSpaceCastInst>(V)) continue; Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); // Adjust the types of any constant operands. if (SelectInst *SI = dyn_cast<SelectInst>(V)) { if (isa<ConstantPointerNull>(SI->getOperand(1))) SI->setOperand(1, ConstantPointerNull::get(NewTy)); if (isa<ConstantPointerNull>(SI->getOperand(2))) SI->setOperand(2, ConstantPointerNull::get(NewTy)); } else if (PHINode *Phi = dyn_cast<PHINode>(V)) { for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { if (isa<ConstantPointerNull>(Phi->getIncomingValue(I))) Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy)); } } continue; } IntrinsicInst *Intr = cast<IntrinsicInst>(Call); Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), MemCpy->getLength(), MemCpy->getAlignment(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast<MemMoveInst>(Intr); Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(), MemMove->getLength(), MemMove->getAlignment(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any // address space. continue; case Intrinsic::objectsize: { Value *Src = Intr->getOperand(0); Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) }); Intr->replaceAllUsesWith(NewCall); Intr->eraseFromParent(); continue; } default: Intr->dump(); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } }
// InlineFunction - This function inlines the called function into the basic // block of the caller. This returns false if it is not possible to inline this // call. The program is still in a well defined state if this occurs though. // // Note that this only does one level of inlining. For example, if the // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now // exists in the instruction stream. Similarly this will inline a recursive // function by one level. // bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { Instruction *TheCall = CS.getInstruction(); LLVMContext &Context = TheCall->getContext(); assert(TheCall->getParent() && TheCall->getParent()->getParent() && "Instruction not in function!"); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); const Function *CalledFunc = CS.getCalledFunction(); if (CalledFunc == 0 || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; // If the call to the callee is not a tail call, we must clear the 'tail' // flags on any calls that we inline. bool MustClearTailCallFlags = !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall()); // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); BasicBlock *OrigBB = TheCall->getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { if (!Caller->hasGC()) Caller->setGC(CalledFunc->getGC()); else if (CalledFunc->getGC() != Caller->getGC()) return false; } // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. // Function::iterator LastBlock = &Caller->back(); // Make sure to capture all of the return instructions from the cloned // function. SmallVector<ReturnInst*, 8> Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. CallSite::arg_iterator AI = CS.arg_begin(); unsigned ArgNo = 0; for (Function::const_arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); // Calls that we inline may use the new alloca, so we need to clear // their 'tail' flags if HandleByValArgument introduced a new alloca and // the callee has calls. MustClearTailCallFlags |= ActualArg != *AI; } VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); // Update inlined instructions' line number information. fixupLineNumbers(Caller, FirstNewBlock, TheCall); } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. // { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast<AllocaInst>(I++); if (AI == 0) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (!isa<Constant>(AI->getArraySize())) continue; // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them // all at once. while (isa<AllocaInst>(I) && isa<Constant>(cast<AllocaInst>(I)->getArraySize())) { IFI.StaticAllocas.push_back(cast<AllocaInst>(I)); ++I; } // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. Caller->getEntryBlock().getInstList().splice(InsertPoint, FirstNewBlock->getInstList(), AI, I); } } // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. if (!IFI.StaticAllocas.empty()) { IRBuilder<> builder(FirstNewBlock->begin()); for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) { AllocaInst *AI = IFI.StaticAllocas[ai]; // If the alloca is already scoped to something smaller than the whole // function then there's no need to add redundant, less accurate markers. if (hasLifetimeMarkers(AI)) continue; builder.CreateLifetimeStart(AI); for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { IRBuilder<> builder(Returns[ri]); builder.CreateLifetimeEnd(AI); } } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin()) .CreateCall(StackSave, "savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); } // Count the number of StackRestore calls we insert. unsigned NumStackRestores = Returns.size(); // If we are inlining an invoke instruction, insert restores before each // unwind. These unwinds will be rewritten into branches later. if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) { IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr); ++NumStackRestores; } } } // If we are inlining tail call instruction through a call site that isn't // marked 'tail', we must remove the tail marker for any calls in the inlined // code. Also, calls inlined through a 'nounwind' call site should be marked // 'nounwind'. if (InlinedFunctionInfo.ContainsCalls && (MustClearTailCallFlags || MarkNoUnwind)) { for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) if (CallInst *CI = dyn_cast<CallInst>(I)) { if (MustClearTailCallFlags) CI->setTailCall(false); if (MarkNoUnwind) CI->setDoesNotThrow(); } } // If we are inlining through a 'nounwind' call site then any inlined 'unwind' // instructions are unreachable. if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind) for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) { TerminatorInst *Term = BB->getTerminator(); if (isa<UnwindInst>(Term)) { new UnreachableInst(Context, Term); BB->getInstList().erase(Term); } } // If we are inlining for an invoke instruction, we must make sure to rewrite // any inlined 'unwind' instructions into branches to the invoke exception // destination, and call instructions into invoke instructions. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(), FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->getBasicBlockList().pop_back(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) BranchInst::Create(II->getNormalDest(), TheCall); // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!TheCall->use_empty()) { ReturnInst *R = Returns[0]; if (TheCall == R->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); // We are now done with the inlining. return true; } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(NewBr, CalledFunc->getName()+".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(TheCall, CalledFunc->getName()+".exit"); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // TerminatorInst *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(), FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. Type *RTy = CalledFunc->getReturnType(); PHINode *PHI = 0; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. if (!TheCall->use_empty()) { PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. TheCall->replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node // as appropriate. if (PHI) { for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } } // Add a branch to the merge points and remove return instructions. for (unsigned i = 0, e = Returns.size(); i != e; ++i) { ReturnInst *RI = Returns[i]; BranchInst::Create(AfterCallBB, RI); RI->eraseFromParent(); } } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!TheCall->use_empty()) { if (TheCall == Returns[0]->getReturnValue()) TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); else TheCall->replaceAllUsesWith(Returns[0]->getReturnValue()); } // Update PHI nodes that use the ReturnBB to use the AfterCallBB. BasicBlock *ReturnBB = Returns[0]->getParent(); ReturnBB->replaceAllUsesWith(AfterCallBB); // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. AfterCallBB->getInstList().splice(AfterCallBB->begin(), ReturnBB->getInstList()); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!TheCall->use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType())); } // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes OrigBB->getInstList().splice(Br, CalleeEntry->getInstList()); // Remove the unconditional branch. OrigBB->getInstList().erase(Br); // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) if (Value *V = SimplifyInstruction(PHI, IFI.TD)) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } return true; }
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (auto *I = simplifyAllocaArraySize(*this, AI)) return I; if (AI.getAllocatedType()->isSized()) { // If the alignment is 0 (unspecified), assign it the preferred alignment. if (AI.getAlignment() == 0) AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType())); // Move all alloca's of zero byte objects to the entry block and merge them // together. Note that we only do this for alloca's, because malloc should // allocate and return a unique pointer, even for a zero byte allocation. if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) { // For a zero sized alloca there is no point in doing an array allocation. // This is helpful if the array size is a complicated expression not used // elsewhere. if (AI.isArrayAllocation()) { AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); return &AI; } // Get the first instruction in the entry block. BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); if (FirstInst != &AI) { // If the entry block doesn't start with a zero-size alloca then move // this one to the start of the entry block. There is no problem with // dominance as the array size was forced to a constant earlier already. AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { AI.moveBefore(FirstInst); return &AI; } // If the alignment of the entry block alloca is 0 (unspecified), // assign it the preferred alignment. if (EntryAI->getAlignment() == 0) EntryAI->setAlignment( DL.getPrefTypeAlignment(EntryAI->getAllocatedType())); // Replace this zero-sized alloca with the one at the start of the entry // block after ensuring that the address will be aligned enough for both // types. unsigned MaxAlign = std::max(EntryAI->getAlignment(), AI.getAlignment()); EntryAI->setAlignment(MaxAlign); if (AI.getType() != EntryAI->getType()) return new BitCastInst(EntryAI, AI.getType()); return replaceInstUsesWith(AI, EntryAI); } } } if (AI.getAlignment()) { // Check to see if this allocation is only modified by a memcpy/memmove from // a constant global whose alignment is equal to or exceeds that of the // allocation. If this is the case, we can change all users to use // the constant global instead. This is commonly produced by the CFE by // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' // is only subsequently read. SmallVector<Instruction *, 4> ToDelete; if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment( Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT); if (AI.getAlignment() <= SourceAlign) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) eraseInstFromFunction(*ToDelete[i]); Constant *TheSrc = cast<Constant>(Copy->getSource()); Constant *Cast = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); Instruction *NewI = replaceInstUsesWith(AI, Cast); eraseInstFromFunction(*Copy); ++NumGlobalCopies; return NewI; } } } // At last, use the generic allocation site handler to aggressively remove // unused allocas. return visitAllocSite(AI); }
/// performCallSlotOptzn - takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, Value *cpySrc, uint64_t cpyLen, CallInst *C) { // The general transformation to keep in mind is // // call @func(..., src, ...) // memcpy(dest, src, ...) // // -> // // memcpy(dest, src, ...) // call @func(..., dest, ...) // // Since moving the memcpy is technically awkward, we additionally check that // src only holds uninitialized values at the moment of the call, meaning that // the memcpy can be discarded rather than moved. // Deliberately get the source and destination with bitcasts stripped away, // because we'll need to do type comparisons based on the underlying type. CallSite CS(C); // Require that src be an alloca. This simplifies the reasoning considerably. AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); if (!srcAlloca) return false; // Check that all of src is copied to dest. if (TD == 0) return false; ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) return false; uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); if (cpyLen < srcSize) return false; // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { // The destination is an alloca. Check it is larger than srcSize. ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); if (!destArraySize) return false; uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) * destArraySize->getZExtValue(); if (destSize < srcSize) return false; } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { // If the destination is an sret parameter then only accesses that are // outside of the returned struct type can trap. if (!A->hasStructRetAttr()) return false; const Type *StructTy = cast<PointerType>(A->getType())->getElementType(); uint64_t destSize = TD->getTypeAllocSize(StructTy); if (destSize < srcSize) return false; } else { return false; } // Check that src is not accessed except via the call and the memcpy. This // guarantees that it holds only undefined values when passed in (so the final // memcpy can be dropped), that it is not read or written between the call and // the memcpy, and that writing beyond the end of it is undefined. SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(), srcAlloca->use_end()); while (!srcUseList.empty()) { User *UI = srcUseList.pop_back_val(); if (isa<BitCastInst>(UI)) { for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) srcUseList.push_back(*I); } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) { if (G->hasAllZeroIndices()) for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); I != E; ++I) srcUseList.push_back(*I); else return false; } else if (UI != C && UI != cpy) { return false; } } // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. DominatorTree &DT = getAnalysis<DominatorTree>(); if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) if (!DT.dominates(cpyDestInst, C)) return false; // In addition to knowing that the call does not access src in some // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef) return false; // All the checks have passed, so do the transformation. bool changedArgument = false; for (unsigned i = 0; i < CS.arg_size(); ++i) if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { if (cpySrc->getType() != cpyDest->getType()) cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), cpyDest->getName(), C); changedArgument = true; if (CS.getArgument(i)->getType() == cpyDest->getType()) CS.setArgument(i, cpyDest); else CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, CS.getArgument(i)->getType(), cpyDest->getName(), C)); } if (!changedArgument) return false; // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. MD->removeInstruction(C); // Remove the memcpy. MD->removeInstruction(cpy); ++NumMemCpyInstr; return true; }
/// runOnFunction - Insert code to maintain the shadow stack. bool ShadowStackGC::performCustomLowering(Function &F) { LLVMContext &Context = F.getContext(); // Find calls to llvm.gcroot. CollectRoots(F); // If there are no roots in this function, then there is no need to add a // stack map entry for it. if (Roots.empty()) return false; // Build the constant map and figure the type of the shadow stack entry. Value *FrameMap = GetFrameMap(F); Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); // Build the shadow stack entry at the very start of the function. BasicBlock::iterator IP = F.getEntryBlock().begin(); IRBuilder<> AtEntry(IP->getParent(), IP); Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0, "gc_frame"); while (isa<AllocaInst>(IP)) ++IP; AtEntry.SetInsertPoint(IP->getParent(), IP); // Initialize the map pointer and load the current head of the shadow stack. Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry, 0,1,"gc_frame.map"); AtEntry.CreateStore(FrameMap, EntryMapPtr); // After all the allocas... for (unsigned I = 0, E = Roots.size(); I != E; ++I) { // For each root, find the corresponding slot in the aggregate... Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root"); // And use it in lieu of the alloca. AllocaInst *OriginalAlloca = Roots[I].second; SlotPtr->takeName(OriginalAlloca); OriginalAlloca->replaceAllUsesWith(SlotPtr); } // Move past the original stores inserted by GCStrategy::InitRoots. This isn't // really necessary (the collector would never see the intermediate state at // runtime), but it's nicer not to push the half-initialized entry onto the // shadow stack. while (isa<StoreInst>(IP)) ++IP; AtEntry.SetInsertPoint(IP->getParent(), IP); // Push the entry onto the shadow stack. Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, StackEntry,0,0,"gc_frame.next"); Instruction *NewHeadVal = CreateGEP(Context, AtEntry, StackEntry, 0, "gc_newhead"); AtEntry.CreateStore(CurrentHead, EntryNextPtr); AtEntry.CreateStore(NewHeadVal, Head); // For each instruction that escapes... EscapeEnumerator EE(F, "gc_cleanup"); while (IRBuilder<> *AtExit = EE.Next()) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0, "gc_frame.next"); Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); AtExit->CreateStore(SavedHead, Head); } // Delete the original allocas (which are no longer used) and the intrinsic // calls (which are no longer valid). Doing this last avoids invalidating // iterators. for (unsigned I = 0, E = Roots.size(); I != E; ++I) { Roots[I].first->eraseFromParent(); Roots[I].second->eraseFromParent(); } Roots.clear(); return true; }
static void convertInstruction(Instruction *Inst, ConversionState &State) { if (SExtInst *Sext = dyn_cast<SExtInst>(Inst)) { Value *Op = Sext->getOperand(0); Value *NewInst = NULL; // If the operand to be extended is illegal, we first need to fill its // upper bits (which are zero) with its sign bit. if (shouldConvert(Op)) { NewInst = getSignExtend(State.getConverted(Op), Op, Sext); } // If the converted type of the operand is the same as the converted // type of the result, we won't actually be changing the type of the // variable, just its value. if (getPromotedType(Op->getType()) != getPromotedType(Sext->getType())) { NewInst = new SExtInst( NewInst ? NewInst : State.getConverted(Op), getPromotedType(cast<IntegerType>(Sext->getType())), Sext->getName() + ".sext", Sext); } // Now all the bits of the result are correct, but we need to restore // the bits above its type to zero. if (shouldConvert(Sext)) { NewInst = getClearUpper(NewInst, Sext->getType(), Sext); } assert(NewInst && "Failed to convert sign extension"); State.recordConverted(Sext, NewInst); } else if (ZExtInst *Zext = dyn_cast<ZExtInst>(Inst)) { Value *Op = Zext->getOperand(0); Value *NewInst = NULL; // TODO(dschuff): Some of these zexts could be no-ops. if (shouldConvert(Op)) { NewInst = getClearUpper(State.getConverted(Op), Op->getType(), Zext); } // If the converted type of the operand is the same as the converted // type of the result, we won't actually be changing the type of the // variable, just its value. if (getPromotedType(Op->getType()) != getPromotedType(Zext->getType())) { NewInst = CastInst::CreateZExtOrBitCast( NewInst ? NewInst : State.getConverted(Op), getPromotedType(cast<IntegerType>(Zext->getType())), "", Zext); } assert(NewInst); State.recordConverted(Zext, NewInst); } else if (TruncInst *Trunc = dyn_cast<TruncInst>(Inst)) { Value *Op = Trunc->getOperand(0); Value *NewInst = NULL; // If the converted type of the operand is the same as the converted // type of the result, we won't actually be changing the type of the // variable, just its value. if (getPromotedType(Op->getType()) != getPromotedType(Trunc->getType())) { NewInst = new TruncInst( State.getConverted(Op), getPromotedType(cast<IntegerType>(Trunc->getType())), State.getConverted(Op)->getName() + ".trunc", Trunc); } // Restoring the upper-bits-are-zero invariant effectively truncates the // value. if (shouldConvert(Trunc)) { NewInst = getClearUpper(NewInst ? NewInst : Op, Trunc->getType(), Trunc); } assert(NewInst); State.recordConverted(Trunc, NewInst); } else if (AllocaInst *Alloc = dyn_cast<AllocaInst>(Inst)) { // Don't handle arrays of illegal types, but we could handle an array // with size specified as an illegal type, as unlikely as that seems. if (shouldConvert(Alloc) && Alloc->isArrayAllocation()) report_fatal_error("Can't convert arrays of illegal type"); AllocaInst *NewInst = new AllocaInst( getPromotedType(Alloc->getAllocatedType()), State.getConverted(Alloc->getArraySize()), "", Alloc); NewInst->setAlignment(Alloc->getAlignment()); State.recordConverted(Alloc, NewInst); } else if (BitCastInst *BCInst = dyn_cast<BitCastInst>(Inst)) { // Only handle pointers. Ints can't be casted to/from other ints Type *DestType = shouldConvert(BCInst) ? getPromotedType(BCInst->getDestTy()) : BCInst->getDestTy(); BitCastInst *NewInst = new BitCastInst( State.getConverted(BCInst->getOperand(0)), DestType, "", BCInst); State.recordConverted(BCInst, NewInst); } else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) { if (shouldConvert(Load)) { splitLoad(Load, State); } } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) { if (shouldConvert(Store->getValueOperand())) { splitStore(Store, State); } } else if (isa<CallInst>(Inst)) { report_fatal_error("can't convert calls with illegal types"); } else if (BinaryOperator *Binop = dyn_cast<BinaryOperator>(Inst)) { Value *NewInst = NULL; if (Binop->getOpcode() == Instruction::AShr) { // The AShr operand needs to be sign-extended to the promoted size // before shifting. Because the sign-extension is implemented with // with AShr, it can be combined with the original operation. Value *Op = Binop->getOperand(0); Value *ShiftAmount = NULL; APInt SignShiftAmt = APInt( getPromotedType(Op->getType())->getIntegerBitWidth(), getPromotedType(Op->getType())->getIntegerBitWidth() - Op->getType()->getIntegerBitWidth()); NewInst = BinaryOperator::Create( Instruction::Shl, State.getConverted(Op), ConstantInt::get(getPromotedType(Op->getType()), SignShiftAmt), State.getConverted(Op)->getName() + ".getsign", Binop); if (ConstantInt *C = dyn_cast<ConstantInt>( State.getConverted(Binop->getOperand(1)))) { ShiftAmount = ConstantInt::get(getPromotedType(Op->getType()), SignShiftAmt + C->getValue()); } else { ShiftAmount = BinaryOperator::Create( Instruction::Add, State.getConverted(Binop->getOperand(1)), ConstantInt::get( getPromotedType(Binop->getOperand(1)->getType()), SignShiftAmt), State.getConverted(Op)->getName() + ".shamt", Binop); } NewInst = BinaryOperator::Create( Instruction::AShr, NewInst, ShiftAmount, Binop->getName() + ".result", Binop); } else { // If the original operation is not AShr, just recreate it as usual. NewInst = BinaryOperator::Create( Binop->getOpcode(), State.getConverted(Binop->getOperand(0)), State.getConverted(Binop->getOperand(1)), Binop->getName() + ".result", Binop); if (isa<OverflowingBinaryOperator>(NewInst)) { cast<BinaryOperator>(NewInst)->setHasNoUnsignedWrap (Binop->hasNoUnsignedWrap()); cast<BinaryOperator>(NewInst)->setHasNoSignedWrap( Binop->hasNoSignedWrap()); } } // Now restore the invariant if necessary. // This switch also sanity-checks the operation. switch (Binop->getOpcode()) { case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::LShr: // These won't change the upper bits. break; // These can change the upper bits, unless we are sure they never // overflow. So clear them now. case Instruction::Add: case Instruction::Sub: if (!(Binop->hasNoUnsignedWrap() && Binop->hasNoSignedWrap())) NewInst = getClearUpper(NewInst, Binop->getType(), Binop); break; case Instruction::Shl: if (!Binop->hasNoUnsignedWrap()) NewInst = getClearUpper(NewInst, Binop->getType(), Binop); break; // We modified the upper bits ourselves when implementing AShr case Instruction::AShr: NewInst = getClearUpper(NewInst, Binop->getType(), Binop); break; // We should not see FP operators here. // We don't handle mul/div. case Instruction::FAdd: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: case Instruction::FRem: case Instruction::BinaryOpsEnd: errs() << *Inst << "\n"; llvm_unreachable("Cannot handle binary operator"); break; } State.recordConverted(Binop, NewInst); } else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Inst)) { Value *Op0, *Op1; // For signed compares, operands are sign-extended to their // promoted type. For unsigned or equality compares, the comparison // is equivalent with the larger type because they are already // zero-extended. if (Cmp->isSigned()) { Op0 = getSignExtend(State.getConverted(Cmp->getOperand(0)), Cmp->getOperand(0), Cmp); Op1 = getSignExtend(State.getConverted(Cmp->getOperand(1)), Cmp->getOperand(1), Cmp); } else { Op0 = State.getConverted(Cmp->getOperand(0)); Op1 = State.getConverted(Cmp->getOperand(1)); } ICmpInst *NewInst = new ICmpInst( Cmp, Cmp->getPredicate(), Op0, Op1, ""); State.recordConverted(Cmp, NewInst); } else if (SelectInst *Select = dyn_cast<SelectInst>(Inst)) { SelectInst *NewInst = SelectInst::Create( Select->getCondition(), State.getConverted(Select->getTrueValue()), State.getConverted(Select->getFalseValue()), "", Select); State.recordConverted(Select, NewInst); } else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) { PHINode *NewPhi = PHINode::Create( getPromotedType(Phi->getType()), Phi->getNumIncomingValues(), "", Phi); for (unsigned I = 0, E = Phi->getNumIncomingValues(); I < E; ++I) { NewPhi->addIncoming(State.getConverted(Phi->getIncomingValue(I)), Phi->getIncomingBlock(I)); } State.recordConverted(Phi, NewPhi); } else if (SwitchInst *Switch = dyn_cast<SwitchInst>(Inst)) { SwitchInst *NewInst = SwitchInst::Create( State.getConverted(Switch->getCondition()), Switch->getDefaultDest(), Switch->getNumCases(), Switch); for (SwitchInst::CaseIt I = Switch->case_begin(), E = Switch->case_end(); I != E; ++I) { // Build a new case from the ranges that map to the successor BB. Each // range consists of a high and low value which are typed, so the ranges // must be rebuilt and a new case constructed from them. IntegersSubset CaseRanges = I.getCaseValueEx(); IntegersSubsetToBB CaseBuilder; for (unsigned RI = 0, RE = CaseRanges.getNumItems(); RI < RE; ++RI) { CaseBuilder.add( IntItem::fromConstantInt(cast<ConstantInt>(convertConstant( CaseRanges.getItem(RI).getLow().toConstantInt()))), IntItem::fromConstantInt(cast<ConstantInt>(convertConstant( CaseRanges.getItem(RI).getHigh().toConstantInt())))); } IntegersSubset Case = CaseBuilder.getCase(); NewInst->addCase(Case, I.getCaseSuccessor()); } Switch->eraseFromParent(); } else { errs() << *Inst<<"\n"; llvm_unreachable("unhandled instruction"); } }
static Value *julia_to_native(Type *ty, jl_value_t *jt, Value *jv, jl_value_t *aty, bool addressOf, bool byRef, bool inReg, bool needCopy, int argn, jl_codectx_t *ctx, bool *needStackRestore) { Type *vt = jv->getType(); // We're passing any if (ty == jl_pvalue_llvmt) { return boxed(jv,ctx); } if (ty == vt && !addressOf && !byRef) { return jv; } if (vt != jl_pvalue_llvmt) { // argument value is unboxed if (addressOf || (byRef && inReg)) { if (ty->isPointerTy() && ty->getContainedType(0)==vt) { // pass the address of an alloca'd thing, not a box // since those are immutable. *needStackRestore = true; Value *slot = builder.CreateAlloca(vt); builder.CreateStore(jv, slot); return builder.CreateBitCast(slot, ty); } } else if ((vt->isIntegerTy() && ty->isIntegerTy()) || (vt->isFloatingPointTy() && ty->isFloatingPointTy()) || (vt->isPointerTy() && ty->isPointerTy())) { if (vt->getPrimitiveSizeInBits() == ty->getPrimitiveSizeInBits()) { if (!byRef) { return builder.CreateBitCast(jv, ty); } else { *needStackRestore = true; Value *mem = builder.CreateAlloca(ty); builder.CreateStore(jv,builder.CreateBitCast(mem,vt->getPointerTo())); return mem; } } } else if (vt->isStructTy()) { if (!byRef) { return jv; } else { *needStackRestore = true; Value *mem = builder.CreateAlloca(vt); builder.CreateStore(jv,mem); return mem; } } emit_error("ccall: argument type did not match declaration", ctx); } if (jl_is_tuple(jt)) { return emit_unbox(ty,jv,jt); } if (jl_is_cpointer_type(jt) && addressOf) { assert(ty->isPointerTy()); jl_value_t *ety = jl_tparam0(jt); if (aty != ety && ety != (jl_value_t*)jl_any_type && jt != (jl_value_t*)jl_voidpointer_type) { std::stringstream msg; msg << "ccall argument "; msg << argn; emit_typecheck(jv, ety, msg.str(), ctx); } if (jl_is_mutable_datatype(ety)) { // no copy, just reference the data field return builder.CreateBitCast(jv, ty); } else if (jl_is_immutable_datatype(ety) && jt != (jl_value_t*)jl_voidpointer_type) { // yes copy Value *nbytes; if (jl_is_leaf_type(ety)) nbytes = ConstantInt::get(T_int32, jl_datatype_size(ety)); else nbytes = tbaa_decorate(tbaa_datatype, builder.CreateLoad( builder.CreateGEP(builder.CreatePointerCast(emit_typeof(jv), T_pint32), ConstantInt::get(T_size, offsetof(jl_datatype_t,size)/sizeof(int32_t))), false)); *needStackRestore = true; AllocaInst *ai = builder.CreateAlloca(T_int8, nbytes); ai->setAlignment(16); builder.CreateMemCpy(ai, builder.CreateBitCast(jv, T_pint8), nbytes, 1); return builder.CreateBitCast(ai, ty); } // emit maybe copy *needStackRestore = true; Value *jvt = emit_typeof(jv); BasicBlock *mutableBB = BasicBlock::Create(getGlobalContext(),"is-mutable",ctx->f); BasicBlock *immutableBB = BasicBlock::Create(getGlobalContext(),"is-immutable",ctx->f); BasicBlock *afterBB = BasicBlock::Create(getGlobalContext(),"after",ctx->f); Value *ismutable = builder.CreateTrunc( tbaa_decorate(tbaa_datatype, builder.CreateLoad( builder.CreateGEP(builder.CreatePointerCast(jvt, T_pint8), ConstantInt::get(T_size, offsetof(jl_datatype_t,mutabl))), false)), T_int1); builder.CreateCondBr(ismutable, mutableBB, immutableBB); builder.SetInsertPoint(mutableBB); Value *p1 = builder.CreatePointerCast(jv, ty); builder.CreateBr(afterBB); builder.SetInsertPoint(immutableBB); Value *nbytes = tbaa_decorate(tbaa_datatype, builder.CreateLoad( builder.CreateGEP(builder.CreatePointerCast(jvt, T_pint32), ConstantInt::get(T_size, offsetof(jl_datatype_t,size)/sizeof(int32_t))), false)); AllocaInst *ai = builder.CreateAlloca(T_int8, nbytes); ai->setAlignment(16); builder.CreateMemCpy(ai, builder.CreatePointerCast(jv, T_pint8), nbytes, 1); Value *p2 = builder.CreatePointerCast(ai, ty); builder.CreateBr(afterBB); builder.SetInsertPoint(afterBB); PHINode *p = builder.CreatePHI(ty, 2); p->addIncoming(p1, mutableBB); p->addIncoming(p2, immutableBB); return p; } if (addressOf) jl_error("ccall: unexpected & on argument"); // the only "safe" thing to emit here is the expected struct assert(jl_is_datatype(jt)); if (aty != jt) { std::stringstream msg; msg << "ccall argument "; msg << argn; emit_typecheck(jv, jt, msg.str(), ctx); } Value *p = data_pointer(jv); Value *pjv = builder.CreatePointerCast(p, PointerType::get(ty,0)); if (byRef) { if (!needCopy) { return pjv; } else { *needStackRestore = true; Value *mem = builder.CreateAlloca(ty); builder.CreateMemCpy(mem,pjv,(uint64_t)jl_datatype_size(jt),(uint64_t)((jl_datatype_t*)jt)->alignment); return mem; } } else { return builder.CreateLoad(pjv,false); } }
/// performCallSlotOptzn - takes a memcpy and a call that it depends on, /// and checks for the possibility of a call slot optimization by having /// the call write its result directly into the destination of the memcpy. bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, Value *cpyDest, Value *cpySrc, uint64_t cpyLen, unsigned cpyAlign, CallInst *C) { // The general transformation to keep in mind is // // call @func(..., src, ...) // memcpy(dest, src, ...) // // -> // // memcpy(dest, src, ...) // call @func(..., dest, ...) // // Since moving the memcpy is technically awkward, we additionally check that // src only holds uninitialized values at the moment of the call, meaning that // the memcpy can be discarded rather than moved. // Deliberately get the source and destination with bitcasts stripped away, // because we'll need to do type comparisons based on the underlying type. CallSite CS(C); // Require that src be an alloca. This simplifies the reasoning considerably. AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); if (!srcAlloca) return false; // Check that all of src is copied to dest. if (!DL) return false; ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); if (!srcArraySize) return false; uint64_t srcSize = DL->getTypeAllocSize(srcAlloca->getAllocatedType()) * srcArraySize->getZExtValue(); if (cpyLen < srcSize) return false; // Check that accessing the first srcSize bytes of dest will not cause a // trap. Otherwise the transform is invalid since it might cause a trap // to occur earlier than it otherwise would. if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { // The destination is an alloca. Check it is larger than srcSize. ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); if (!destArraySize) return false; uint64_t destSize = DL->getTypeAllocSize(A->getAllocatedType()) * destArraySize->getZExtValue(); if (destSize < srcSize) return false; } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { // If the destination is an sret parameter then only accesses that are // outside of the returned struct type can trap. if (!A->hasStructRetAttr()) return false; Type *StructTy = cast<PointerType>(A->getType())->getElementType(); if (!StructTy->isSized()) { // The call may never return and hence the copy-instruction may never // be executed, and therefore it's not safe to say "the destination // has at least <cpyLen> bytes, as implied by the copy-instruction", return false; } uint64_t destSize = DL->getTypeAllocSize(StructTy); if (destSize < srcSize) return false; } else { return false; } // Check that dest points to memory that is at least as aligned as src. unsigned srcAlign = srcAlloca->getAlignment(); if (!srcAlign) srcAlign = DL->getABITypeAlignment(srcAlloca->getAllocatedType()); bool isDestSufficientlyAligned = srcAlign <= cpyAlign; // If dest is not aligned enough and we can't increase its alignment then // bail out. if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) return false; // Check that src is not accessed except via the call and the memcpy. This // guarantees that it holds only undefined values when passed in (so the final // memcpy can be dropped), that it is not read or written between the call and // the memcpy, and that writing beyond the end of it is undefined. SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(), srcAlloca->user_end()); while (!srcUseList.empty()) { User *U = srcUseList.pop_back_val(); if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) { for (User *UU : U->users()) srcUseList.push_back(UU); } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) { if (G->hasAllZeroIndices()) for (User *UU : U->users()) srcUseList.push_back(UU); else return false; } else if (U != C && U != cpy) { return false; } } // Check that src isn't captured by the called function since the // transformation can cause aliasing issues in that case. for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) if (CS.getArgument(i) == cpySrc && !CS.doesNotCapture(i)) return false; // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) if (!DT.dominates(cpyDestInst, C)) return false; // In addition to knowing that the call does not access src in some // unexpected manner, for example via a global, which we deduce from // the use analysis, we also need to know that it does not sneakily // access dest. We rely on AA to figure this out for us. AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize); // If necessary, perform additional analysis. if (MR != AliasAnalysis::NoModRef) MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT); if (MR != AliasAnalysis::NoModRef) return false; // All the checks have passed, so do the transformation. bool changedArgument = false; for (unsigned i = 0; i < CS.arg_size(); ++i) if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), cpyDest->getName(), C); changedArgument = true; if (CS.getArgument(i)->getType() == Dest->getType()) CS.setArgument(i, Dest); else CS.setArgument(i, CastInst::CreatePointerCast(Dest, CS.getArgument(i)->getType(), Dest->getName(), C)); } if (!changedArgument) return false; // If the destination wasn't sufficiently aligned then increase its alignment. if (!isDestSufficientlyAligned) { assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!"); cast<AllocaInst>(cpyDest)->setAlignment(srcAlign); } // Drop any cached information about the call, because we may have changed // its dependence information by changing its parameter. MD->removeInstruction(C); // Remove the memcpy. MD->removeInstruction(cpy); ++NumMemCpyInstr; return true; }
static int initEnv(Module *mainModule) { /* nArgcP = alloc oldArgc->getType() nArgvV = alloc oldArgv->getType() store oldArgc nArgcP store oldArgv nArgvP klee_init_environment(nArgcP, nArgvP) nArgc = load nArgcP nArgv = load nArgvP oldArgc->replaceAllUsesWith(nArgc) oldArgv->replaceAllUsesWith(nArgv) */ Function *mainFn = mainModule->getFunction(EntryPoint); if (mainFn->arg_size() < 2) { klee_error("Cannot handle ""--posix-runtime"" when main() has less than two arguments.\n"); } Instruction* firstInst = mainFn->begin()->begin(); Value* oldArgc = mainFn->arg_begin(); Value* oldArgv = ++mainFn->arg_begin(); AllocaInst* argcPtr = new AllocaInst(oldArgc->getType(), "argcPtr", firstInst); AllocaInst* argvPtr = new AllocaInst(oldArgv->getType(), "argvPtr", firstInst); /* Insert void klee_init_env(int* argc, char*** argv) */ std::vector<const Type*> params; params.push_back(Type::getInt32Ty(getGlobalContext())); params.push_back(Type::getInt32Ty(getGlobalContext())); Function* initEnvFn = cast<Function>(mainModule->getOrInsertFunction("klee_init_env", Type::getVoidTy(getGlobalContext()), argcPtr->getType(), argvPtr->getType(), NULL)); assert(initEnvFn); std::vector<Value*> args; args.push_back(argcPtr); args.push_back(argvPtr); #if LLVM_VERSION_CODE >= LLVM_VERSION(3, 0) Instruction* initEnvCall = CallInst::Create(initEnvFn, args, "", firstInst); #else Instruction* initEnvCall = CallInst::Create(initEnvFn, args.begin(), args.end(), "", firstInst); #endif Value *argc = new LoadInst(argcPtr, "newArgc", firstInst); Value *argv = new LoadInst(argvPtr, "newArgv", firstInst); oldArgc->replaceAllUsesWith(argc); oldArgv->replaceAllUsesWith(argv); new StoreInst(oldArgc, argcPtr, initEnvCall); new StoreInst(oldArgv, argvPtr, initEnvCall); return 0; }
// // Method: insertBadAllocationSizes() // // Description: // This method will look for allocations and change their size to be // incorrect. It does the following: // o) Changes the number of array elements allocated by alloca and malloc. // // Return value: // true - The module was modified. // false - The module was left unmodified. // bool FaultInjector::insertBadAllocationSizes (Function & F) { // Worklist of allocation sites to rewrite std::vector<AllocaInst * > WorkList; for (Function::iterator fI = F.begin(), fE = F.end(); fI != fE; ++fI) { BasicBlock & BB = *fI; for (BasicBlock::iterator I = BB.begin(), bE = BB.end(); I != bE; ++I) { if (AllocaInst * AI = dyn_cast<AllocaInst>(I)) { if (AI->isArrayAllocation()) { // Skip if we should not insert a fault. if (!doFault()) continue; WorkList.push_back(AI); } } } } while (WorkList.size()) { AllocaInst * AI = WorkList.back(); WorkList.pop_back(); // // Print information about where the fault is being inserted. // printSourceInfo ("Bad allocation size", AI); Instruction * NewAlloc = 0; NewAlloc = new AllocaInst (AI->getAllocatedType(), ConstantInt::get(Int32Type,0), AI->getAlignment(), AI->getName(), AI); AI->replaceAllUsesWith (NewAlloc); AI->eraseFromParent(); ++BadSizes; } // // Try harder to make bad allocation sizes. // WorkList.clear(); for (Function::iterator fI = F.begin(), fE = F.end(); fI != fE; ++fI) { BasicBlock & BB = *fI; for (BasicBlock::iterator I = BB.begin(), bE = BB.end(); I != bE; ++I) { if (AllocaInst * AI = dyn_cast<AllocaInst>(I)) { // // Determine if this is a data type that we can make smaller. // if (((TD->getTypeAllocSize(AI->getAllocatedType())) > 4) && doFault()) { WorkList.push_back(AI); } } } } // // Replace these allocations with an allocation of an integer and cast the // result back into the appropriate type. // while (WorkList.size()) { AllocaInst * AI = WorkList.back(); WorkList.pop_back(); Instruction * NewAlloc = 0; NewAlloc = new AllocaInst (Int32Type, AI->getArraySize(), AI->getAlignment(), AI->getName(), AI); NewAlloc = castTo (NewAlloc, AI->getType(), "", AI); AI->replaceAllUsesWith (NewAlloc); AI->eraseFromParent(); ++BadSizes; } return (BadSizes > 0); }
bool TypeChecksOpt::runOnModule(Module &M) { TS = &getAnalysis<dsa::TypeSafety<TDDataStructures> >(); // Create the necessary prototypes VoidTy = IntegerType::getVoidTy(M.getContext()); Int8Ty = IntegerType::getInt8Ty(M.getContext()); Int32Ty = IntegerType::getInt32Ty(M.getContext()); Int64Ty = IntegerType::getInt64Ty(M.getContext()); VoidPtrTy = PointerType::getUnqual(Int8Ty); TypeTagTy = Int8Ty; TypeTagPtrTy = PointerType::getUnqual(TypeTagTy); Constant *memsetF = M.getOrInsertFunction ("llvm.memset.i64", VoidTy, VoidPtrTy, Int8Ty, Int64Ty, Int32Ty, NULL); trackGlobal = M.getOrInsertFunction("trackGlobal", VoidTy, VoidPtrTy,/*ptr*/ TypeTagTy,/*type*/ Int64Ty,/*size*/ Int32Ty,/*tag*/ NULL); trackInitInst = M.getOrInsertFunction("trackInitInst", VoidTy, VoidPtrTy,/*ptr*/ Int64Ty,/*size*/ Int32Ty,/*tag*/ NULL); trackUnInitInst = M.getOrInsertFunction("trackUnInitInst", VoidTy, VoidPtrTy,/*ptr*/ Int64Ty,/*size*/ Int32Ty,/*tag*/ NULL); trackStoreInst = M.getOrInsertFunction("trackStoreInst", VoidTy, VoidPtrTy,/*ptr*/ TypeTagTy,/*type*/ Int64Ty,/*size*/ Int32Ty,/*tag*/ NULL); checkTypeInst = M.getOrInsertFunction("checkType", VoidTy, TypeTagTy,/*type*/ Int64Ty,/*size*/ TypeTagPtrTy, VoidPtrTy,/*ptr*/ Int32Ty,/*tag*/ NULL); copyTypeInfo = M.getOrInsertFunction("copyTypeInfo", VoidTy, VoidPtrTy,/*dest ptr*/ VoidPtrTy,/*src ptr*/ Int64Ty,/*size*/ Int32Ty,/*tag*/ NULL); setTypeInfo = M.getOrInsertFunction("setTypeInfo", VoidTy, VoidPtrTy,/*dest ptr*/ TypeTagPtrTy,/*metadata*/ Int64Ty,/*size*/ TypeTagTy, VoidPtrTy, Int32Ty,/*tag*/ NULL); trackStringInput = M.getOrInsertFunction("trackStringInput", VoidTy, VoidPtrTy, Int32Ty, NULL); getTypeTag = M.getOrInsertFunction("getTypeTag", VoidTy, VoidPtrTy, /*ptr*/ Int64Ty, /*size*/ TypeTagPtrTy, /*dest for type tag*/ Int32Ty, /*tag*/ NULL); MallocFunc = M.getFunction("malloc"); for(Value::use_iterator User = trackGlobal->use_begin(); User != trackGlobal->use_end(); ++User) { CallInst *CI = dyn_cast<CallInst>(*User); assert(CI); if(TS->isTypeSafe(CI->getOperand(1)->stripPointerCasts(), CI->getParent()->getParent())) { std::vector<Value*>Args; Args.push_back(CI->getOperand(1)); Args.push_back(CI->getOperand(3)); Args.push_back(CI->getOperand(4)); CallInst::Create(trackInitInst, Args, "", CI); toDelete.push_back(CI); } } for(Value::use_iterator User = checkTypeInst->use_begin(); User != checkTypeInst->use_end(); ++User) { CallInst *CI = dyn_cast<CallInst>(*User); assert(CI); if(TS->isTypeSafe(CI->getOperand(4)->stripPointerCasts(), CI->getParent()->getParent())) { toDelete.push_back(CI); } } for(Value::use_iterator User = trackStoreInst->use_begin(); User != trackStoreInst->use_end(); ++User) { CallInst *CI = dyn_cast<CallInst>(*User); assert(CI); if(TS->isTypeSafe(CI->getOperand(1)->stripPointerCasts(), CI->getParent()->getParent())) { toDelete.push_back(CI); } } // for alloca's if they are type known // assume initialized with TOP for(Value::use_iterator User = trackUnInitInst->use_begin(); User != trackUnInitInst->use_end(); ) { CallInst *CI = dyn_cast<CallInst>(*(User++)); assert(CI); // check if operand is an alloca inst. if(TS->isTypeSafe(CI->getOperand(1)->stripPointerCasts(), CI->getParent()->getParent())) { CI->setCalledFunction(trackInitInst); if(AllocaInst *AI = dyn_cast<AllocaInst>(CI->getOperand(1)->stripPointerCasts())) { // Initialize the allocation to NULL std::vector<Value *> Args2; Args2.push_back(CI->getOperand(1)); Args2.push_back(ConstantInt::get(Int8Ty, 0)); Args2.push_back(CI->getOperand(2)); Args2.push_back(ConstantInt::get(Int32Ty, AI->getAlignment())); CallInst::Create(memsetF, Args2, "", CI); } } } if(MallocFunc) { for(Value::use_iterator User = MallocFunc->use_begin(); User != MallocFunc->use_end(); User ++) { CallInst *CI = dyn_cast<CallInst>(*User); if(!CI) continue; if(TS->isTypeSafe(CI, CI->getParent()->getParent())){ CastInst *BCI = BitCastInst::CreatePointerCast(CI, VoidPtrTy); CastInst *Size = CastInst::CreateSExtOrBitCast(CI->getOperand(1), Int64Ty); Size->insertAfter(CI); BCI->insertAfter(Size); std::vector<Value *>Args; Args.push_back(BCI); Args.push_back(Size); Args.push_back(ConstantInt::get(Int32Ty, 0)); CallInst *CINew = CallInst::Create(trackInitInst, Args); CINew->insertAfter(BCI); } } } // also do for mallocs/calloc/other allocators??? // other allocators?? for(Value::use_iterator User = copyTypeInfo->use_begin(); User != copyTypeInfo->use_end(); ++User) { CallInst *CI = dyn_cast<CallInst>(*User); assert(CI); if(TS->isTypeSafe(CI->getOperand(1)->stripPointerCasts(), CI->getParent()->getParent())) { std::vector<Value*> Args; Args.push_back(CI->getOperand(1)); Args.push_back(CI->getOperand(3)); // size Args.push_back(CI->getOperand(4)); CallInst::Create(trackInitInst, Args, "", CI); toDelete.push_back(CI); } } for(Value::use_iterator User = setTypeInfo->use_begin(); User != setTypeInfo->use_end(); ++User) { CallInst *CI = dyn_cast<CallInst>(*User); assert(CI); if(TS->isTypeSafe(CI->getOperand(1)->stripPointerCasts(), CI->getParent()->getParent())) { std::vector<Value*> Args; Args.push_back(CI->getOperand(1)); Args.push_back(CI->getOperand(3)); // size Args.push_back(CI->getOperand(6)); CallInst::Create(trackInitInst, Args, "", CI); toDelete.push_back(CI); } } for(Value::use_iterator User = getTypeTag->use_begin(); User != getTypeTag->use_end(); ++User) { CallInst *CI = dyn_cast<CallInst>(*User); assert(CI); if(TS->isTypeSafe(CI->getOperand(1)->stripPointerCasts(), CI->getParent()->getParent())) { AllocaInst *AI = dyn_cast<AllocaInst>(CI->getOperand(3)->stripPointerCasts()); assert(AI); std::vector<Value*>Args; Args.push_back(CI->getOperand(3)); Args.push_back(ConstantInt::get(Int8Ty, 255)); Args.push_back(CI->getOperand(2)); Args.push_back(ConstantInt::get(Int32Ty, AI->getAlignment())); CallInst::Create(memsetF, Args, "", CI); toDelete.push_back(CI); } } numSafe += toDelete.size(); while(!toDelete.empty()) { Instruction *I = toDelete.back(); toDelete.pop_back(); I->eraseFromParent(); } return (numSafe > 0); }
void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); if (AST) PointerAllocaValues.resize(Allocas.size()); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); removeLifetimeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. if (AST) AST->deleteValue(AI); AI->eraseFromParent(); // Remove the alloca from the Allocas list, since it has been processed RemoveFromAllocasList(AllocaNum); ++NumDeadAlloca; continue; } // Calculate the set of read and write-locations for each alloca. This is // analogous to finding the 'uses' and 'definitions' of each variable. Info.AnalyzeAlloca(AI); // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { RewriteSingleStoreAlloca(AI, Info, LBI); // Finally, after the scan, check to see if the store is all that is left. if (Info.UsingBlocks.empty()) { // Record debuginfo for the store and remove the declaration's // debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { if (!DIB) DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); DDI->eraseFromParent(); } // Remove the (now dead) store and alloca. Info.OnlyStore->eraseFromParent(); LBI.deleteValue(Info.OnlyStore); if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; continue; } } // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock) { PromoteSingleBlockAlloca(AI, Info, LBI); // Finally, after the scan, check to see if the stores are all that is // left. if (Info.UsingBlocks.empty()) { // Remove the (now dead) stores and alloca. while (!AI->use_empty()) { StoreInst *SI = cast<StoreInst>(AI->use_back()); // Record debuginfo for the store before removing it. if (DbgDeclareInst *DDI = Info.DbgDeclare) { if (!DIB) DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); } SI->eraseFromParent(); LBI.deleteValue(SI); } if (AST) AST->deleteValue(AI); AI->eraseFromParent(); LBI.deleteValue(AI); // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); // The alloca's debuginfo can be removed as well. if (DbgDeclareInst *DDI = Info.DbgDeclare) DDI->eraseFromParent(); ++NumLocalPromoted; continue; } } // If we haven't computed dominator tree levels, do so now. if (DomLevels.empty()) { SmallVector<DomTreeNode*, 32> Worklist; DomTreeNode *Root = DT.getRootNode(); DomLevels[Root] = 0; Worklist.push_back(Root); while (!Worklist.empty()) { DomTreeNode *Node = Worklist.pop_back_val(); unsigned ChildLevel = DomLevels[Node] + 1; for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; ++CI) { DomLevels[*CI] = ChildLevel; Worklist.push_back(*CI); } } } // If we haven't computed a numbering for the BB's in the function, do so // now. if (BBNumbers.empty()) { unsigned ID = 0; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) BBNumbers[I] = ID++; } // If we have an AST to keep updated, remember some pointer value that is // stored into the alloca. if (AST) PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; // Remember the dbg.declare intrinsic describing this alloca, if any. if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; // Keep the reverse mapping of the 'Allocas' array for the rename pass. AllocaLookup[Allocas[AllocaNum]] = AllocaNum; // At this point, we're committed to promoting the alloca using IDF's, and // the standard SSA construction algorithm. Determine which blocks need PHI // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. DetermineInsertionPoint(AI, AllocaNum, Info); } if (Allocas.empty()) return; // All of the allocas must have been trivial! LBI.clear(); // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. // RenamePassData::ValVector Values(Allocas.size()); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary // std::vector<RenamePassData> RenamePassWorkList; RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); RenamePassWorkList.pop_back(); // RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); } while (!RenamePassWorkList.empty()); // The renamer uses the Visited set to avoid infinite loops. Clear it now. Visited.clear(); // Remove the allocas themselves from the function. for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { Instruction *A = Allocas[i]; // If there are any uses of the alloca instructions left, they must be in // unreachable basic blocks that were not processed by walking the dominator // tree. Just delete the users now. if (!A->use_empty()) A->replaceAllUsesWith(UndefValue::get(A->getType())); if (AST) AST->deleteValue(A); A->eraseFromParent(); } // Remove alloca's dbg.declare instrinsics from the function. for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) DDI->eraseFromParent(); // Loop over all of the PHI nodes and see if there are any that we can get // rid of because they merge all of the same incoming values. This can // happen due to undef values coming into the PHI nodes. This process is // iterative, because eliminating one PHI node can cause others to be removed. bool EliminatedAPHI = true; while (EliminatedAPHI) { EliminatedAPHI = false; // Iterating over NewPhiNodes is deterministic, so it is safe to try to // simplify and RAUW them as we go. If it was not, we could add uses to // the values we replace with in a non deterministic order, thus creating // non deterministic def->use chains. for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); PN->eraseFromParent(); NewPhiNodes.erase(I++); EliminatedAPHI = true; continue; } ++I; } } // At this point, the renamer has added entries to PHI nodes for all reachable // code. Unfortunately, there may be unreachable blocks which the renamer // hasn't traversed. If this is the case, the PHI nodes may not // have incoming values for all predecessors. Loop over all PHI nodes we have // created, inserting undef values if they are missing any incoming values. // for (DenseMap<std::pair<unsigned, unsigned>, PHINode*>::iterator I = NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { // We want to do this once per basic block. As such, only process a block // when we find the PHI that is the first entry in the block. PHINode *SomePHI = I->second; BasicBlock *BB = SomePHI->getParent(); if (&BB->front() != SomePHI) continue; // Only do work here if there the PHI nodes are missing incoming values. We // know that all PHI nodes that were inserted in a block will have the same // number of incoming values, so we can just check any of them. if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) continue; // Get the preds for BB. SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); // Ok, now we know that all of the PHI nodes are missing entries for some // basic blocks. Start by sorting the incoming predecessors for efficient // access. std::sort(Preds.begin(), Preds.end()); // Now we loop through all BB's which have entries in SomePHI and remove // them from the Preds list. for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { // Do a log(n) search of the Preds list for the entry we want. SmallVector<BasicBlock*, 16>::iterator EntIt = std::lower_bound(Preds.begin(), Preds.end(), SomePHI->getIncomingBlock(i)); assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& "PHI node has entry for a block which is not a predecessor!"); // Remove the entry Preds.erase(EntIt); } // At this point, the blocks left in the preds list must have dummy // entries inserted into every PHI nodes for the block. Update all the phi // nodes in this block that we are inserting (there could be phis before // mem2reg runs). unsigned NumBadPreds = SomePHI->getNumIncomingValues(); BasicBlock::iterator BBI = BB->begin(); while ((SomePHI = dyn_cast<PHINode>(BBI++)) && SomePHI->getNumIncomingValues() == NumBadPreds) { Value *UndefVal = UndefValue::get(SomePHI->getType()); for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) SomePHI->addIncoming(UndefVal, Preds[pred]); } } NewPhiNodes.clear(); }
// // Method: TransformCSSAllocasToMallocs() // // Description: // This method is given the set of DSNodes from the stack safety pass that // have been marked for promotion. It then finds all alloca instructions // that have not been marked type-unknown and promotes them to heap // allocations. // void ConvertUnsafeAllocas::TransformCSSAllocasToMallocs (Module & M, std::set<DSNode *> & cssAllocaNodes) { for (Module::iterator FI = M.begin(); FI != M.end(); ++FI) { // // Skip functions that have no DSGraph. These are probably functions with // no function body and are, hence, cannot be analyzed. // if (!(budsPass->hasDSGraph (*FI))) continue; // // Get the DSGraph for the current function. // DSGraph *DSG = budsPass->getDSGraph(*FI); // // Search for alloca instructions that need promotion and add them to the // worklist. // std::vector<AllocaInst *> Worklist; for (Function::iterator BB = FI->begin(); BB != FI->end(); ++BB) { for (BasicBlock::iterator ii = BB->begin(); ii != BB->end(); ++ii) { Instruction * I = ii; if (AllocaInst * AI = dyn_cast<AllocaInst>(I)) { // // Get the DSNode for the allocation. // DSNode *DSN = DSG->getNodeForValue(AI).getNode(); assert (DSN && "No DSNode for alloca!\n"); // // If the alloca is type-known, we do not need to promote it, so // don't bother with it. // if (DSN->isNodeCompletelyFolded()) continue; // // Determine if the DSNode for the alloca is one of those marked as // unsafe by the stack safety analysis pass. If not, then we do not // need to promote it. // if (cssAllocaNodes.find(DSN) == cssAllocaNodes.end()) continue; // // If the DSNode for this alloca is already listed in the // unsafeAllocaNode vector, remove it since we are processing it here // std::list<DSNode *>::iterator NodeI = find (unsafeAllocaNodes.begin(), unsafeAllocaNodes.end(), DSN); if (NodeI != unsafeAllocaNodes.end()) { unsafeAllocaNodes.erase(NodeI); } // // This alloca needs to be changed to a malloc. Add it to the // worklist. // Worklist.push_back (AI); } } } // // Update the statistics. // if (Worklist.size()) ConvAllocas += Worklist.size(); // // Convert everything in the worklist into a malloc instruction. // while (Worklist.size()) { // // Grab an alloca from the worklist. // AllocaInst * AI = Worklist.back(); Worklist.pop_back(); // // Get the DSNode for this alloca. // DSNode *DSN = DSG->getNodeForValue(AI).getNode(); assert (DSN && "No DSNode for alloca!\n"); // // Promote the alloca and remove it from the program. // promoteAlloca (AI, DSN); AI->getParent()->getInstList().erase(AI); } } }
/// InlineCallIfPossible - If it is possible to inline the specified call site, /// do so and update the CallGraph for this operation. /// /// This function also does some basic book-keeping to update the IR. The /// InlinedArrayAllocas map keeps track of any allocas that are already /// available from other functions inlined into the caller. If we are able to /// inline this call site we attempt to reuse already available allocas or add /// any new allocas to the set if not possible. static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, bool InsertLifetime) { Function *Callee = CS.getCalledFunction(); Function *Caller = CS.getCaller(); // Try to inline the function. Get the list of static allocas that were // inlined. if (!InlineFunction(CS, IFI, InsertLifetime)) return false; // If the inlined function had a higher stack protection level than the // calling function, then bump up the caller's stack protection level. if (Callee->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtectReq); else if (Callee->hasFnAttr(Attribute::StackProtect) && !Caller->hasFnAttr(Attribute::StackProtectReq)) Caller->addFnAttr(Attribute::StackProtect); // Look at all of the allocas that we inlined through this call site. If we // have already inlined other allocas through other calls into this function, // then we know that they have disjoint lifetimes and that we can merge them. // // There are many heuristics possible for merging these allocas, and the // different options have different tradeoffs. One thing that we *really* // don't want to hurt is SRoA: once inlining happens, often allocas are no // longer address taken and so they can be promoted. // // Our "solution" for that is to only merge allocas whose outermost type is an // array type. These are usually not promoted because someone is using a // variable index into them. These are also often the most important ones to // merge. // // A better solution would be to have real memory lifetime markers in the IR // and not have the inliner do any merging of allocas at all. This would // allow the backend to do proper stack slot coloring of all allocas that // *actually make it to the backend*, which is really what we want. // // Because we don't have this information, we do this simple and useful hack. // SmallPtrSet<AllocaInst*, 16> UsedAllocas; // When processing our SCC, check to see if CS was inlined from some other // call site. For example, if we're processing "A" in this code: // A() { B() } // B() { x = alloca ... C() } // C() { y = alloca ... } // Assume that C was not inlined into B initially, and so we're processing A // and decide to inline B into A. Doing this makes an alloca available for // reuse and makes a callsite (C) available for inlining. When we process // the C call site we don't want to do any alloca merging between X and Y // because their scopes are not disjoint. We could make this smarter by // keeping track of the inline history for each alloca in the // InlinedArrayAllocas but this isn't likely to be a significant win. if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. return true; // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e; ++AllocaNo) { AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); if (ATy == 0 || AI->isArrayAllocation()) continue; // Get the list of all available allocas for this array type. std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy]; // Loop over the allocas in AllocasForType to see if we can reuse one. Note // that we have to be careful not to reuse the same "available" alloca for // multiple different allocas that we just inlined, we use the 'UsedAllocas' // set to keep track of which "available" allocas are being used by this // function. Also, AllocasForType can be empty of course! bool MergedAwayAlloca = false; for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) { AllocaInst *AvailableAlloca = AllocasForType[i]; // The available alloca has to be in the right function, not in some other // function in this SCC. if (AvailableAlloca->getParent() != AI->getParent()) continue; // If the inlined function already uses this alloca then we can't reuse // it. if (!UsedAllocas.insert(AvailableAlloca)) continue; // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " << *AvailableAlloca << '\n'); AI->replaceAllUsesWith(AvailableAlloca); AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; IFI.StaticAllocas[AllocaNo] = 0; break; } // If we already nuked the alloca, we're done with it. if (MergedAwayAlloca) continue; // If we were unable to merge away the alloca either because there are no // allocas of the right type available or because we reused them all // already, remember that this alloca came from an inlined function and mark // it used so we don't reuse it for other allocas from this inline // operation. AllocasForType.push_back(AI); UsedAllocas.insert(AI); } return true; }
/// Look at all of the allocas that we inlined through this call site. If we /// have already inlined other allocas through other calls into this function, /// then we know that they have disjoint lifetimes and that we can merge them. /// /// There are many heuristics possible for merging these allocas, and the /// different options have different tradeoffs. One thing that we *really* /// don't want to hurt is SRoA: once inlining happens, often allocas are no /// longer address taken and so they can be promoted. /// /// Our "solution" for that is to only merge allocas whose outermost type is an /// array type. These are usually not promoted because someone is using a /// variable index into them. These are also often the most important ones to /// merge. /// /// A better solution would be to have real memory lifetime markers in the IR /// and not have the inliner do any merging of allocas at all. This would /// allow the backend to do proper stack slot coloring of all allocas that /// *actually make it to the backend*, which is really what we want. /// /// Because we don't have this information, we do this simple and useful hack. static void mergeInlinedArrayAllocas( Function *Caller, InlineFunctionInfo &IFI, InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) { SmallPtrSet<AllocaInst *, 16> UsedAllocas; // When processing our SCC, check to see if CS was inlined from some other // call site. For example, if we're processing "A" in this code: // A() { B() } // B() { x = alloca ... C() } // C() { y = alloca ... } // Assume that C was not inlined into B initially, and so we're processing A // and decide to inline B into A. Doing this makes an alloca available for // reuse and makes a callsite (C) available for inlining. When we process // the C call site we don't want to do any alloca merging between X and Y // because their scopes are not disjoint. We could make this smarter by // keeping track of the inline history for each alloca in the // InlinedArrayAllocas but this isn't likely to be a significant win. if (InlineHistory != -1) // Only do merging for top-level call sites in SCC. return; // Loop over all the allocas we have so far and see if they can be merged with // a previously inlined alloca. If not, remember that we had it. for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e; ++AllocaNo) { AllocaInst *AI = IFI.StaticAllocas[AllocaNo]; // Don't bother trying to merge array allocations (they will usually be // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType()); if (!ATy || AI->isArrayAllocation()) continue; // Get the list of all available allocas for this array type. std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy]; // Loop over the allocas in AllocasForType to see if we can reuse one. Note // that we have to be careful not to reuse the same "available" alloca for // multiple different allocas that we just inlined, we use the 'UsedAllocas' // set to keep track of which "available" allocas are being used by this // function. Also, AllocasForType can be empty of course! bool MergedAwayAlloca = false; for (AllocaInst *AvailableAlloca : AllocasForType) { unsigned Align1 = AI->getAlignment(), Align2 = AvailableAlloca->getAlignment(); // The available alloca has to be in the right function, not in some other // function in this SCC. if (AvailableAlloca->getParent() != AI->getParent()) continue; // If the inlined function already uses this alloca then we can't reuse // it. if (!UsedAllocas.insert(AvailableAlloca).second) continue; // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare // success! DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: " << *AvailableAlloca << '\n'); // Move affected dbg.declare calls immediately after the new alloca to // avoid the situation when a dbg.declare preceeds its alloca. if (auto *L = LocalAsMetadata::getIfExists(AI)) if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L)) for (User *U : MDV->users()) if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U)) DDI->moveBefore(AvailableAlloca->getNextNode()); AI->replaceAllUsesWith(AvailableAlloca); if (Align1 != Align2) { if (!Align1 || !Align2) { const DataLayout &DL = Caller->getParent()->getDataLayout(); unsigned TypeAlign = DL.getABITypeAlignment(AI->getAllocatedType()); Align1 = Align1 ? Align1 : TypeAlign; Align2 = Align2 ? Align2 : TypeAlign; } if (Align1 > Align2) AvailableAlloca->setAlignment(AI->getAlignment()); } AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; IFI.StaticAllocas[AllocaNo] = nullptr; break; } // If we already nuked the alloca, we're done with it. if (MergedAwayAlloca) continue; // If we were unable to merge away the alloca either because there are no // allocas of the right type available or because we reused them all // already, remember that this alloca came from an inlined function and mark // it used so we don't reuse it for other allocas from this inline // operation. AllocasForType.push_back(AI); UsedAllocas.insert(AI); } }