/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select /// instruction. bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { SelectInst *SI = cast<SelectInst>(I.getOperand(1)); // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y int NonNullOperand = -1; if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1))) if (ST->isNullValue()) NonNullOperand = 2; // div/rem X, (Cond ? Y : 0) -> div/rem X, Y if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2))) if (ST->isNullValue()) NonNullOperand = 1; if (NonNullOperand == -1) return false; Value *SelectCond = SI->getOperand(0); // Change the div/rem to use 'Y' instead of the select. I.setOperand(1, SI->getOperand(NonNullOperand)); // Okay, we know we replace the operand of the div/rem with 'Y' with no // problem. However, the select, or the condition of the select may have // multiple uses. Based on our knowledge that the operand must be non-zero, // propagate the known value for the select into other uses of it, and // propagate a known value of the condition into its other users. // If the select and condition only have a single use, don't bother with this, // early exit. if (SI->use_empty() && SelectCond->hasOneUse()) return true; // Scan the current block backward, looking for other uses of SI. BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin(); while (BBI != BBFront) { --BBI; // If we found a call to a function, we can't assume it will return, so // information from below it cannot be propagated above it. if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI)) break; // Replace uses of the select or its condition with the known values. for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end(); I != E; ++I) { if (*I == SI) { *I = SI->getOperand(NonNullOperand); Worklist.Add(BBI); } else if (*I == SelectCond) { *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) : ConstantInt::getFalse(BBI->getContext()); Worklist.Add(BBI); } } // If we past the instruction, quit looking for it. if (&*BBI == SI) SI = 0; if (&*BBI == SelectCond) SelectCond = 0; // If we ran out of things to eliminate, break out of the loop. if (SelectCond == 0 && SI == 0) break; } return true; }
/// SimplifyStoreAtEndOfBlock - Turn things like: /// if () { *P = v1; } else { *P = v2 } /// into a phi node with a store in the successor. /// /// Simplify things like: /// *P = v1; if () { *P = v2; } /// into a phi node with a store in the successor. /// bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { BasicBlock *StoreBB = SI.getParent(); // Check to see if the successor block has exactly two incoming edges. If // so, see if the other predecessor contains a store to the same location. // if so, insert a PHI node (if needed) and move the stores down. BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); BasicBlock *P = *PI; BasicBlock *OtherBB = nullptr; if (P != StoreBB) OtherBB = P; if (++PI == pred_end(DestBB)) return false; P = *PI; if (P != StoreBB) { if (OtherBB) return false; OtherBB = P; } if (++PI != pred_end(DestBB)) return false; // Bail out if all the relevant blocks aren't distinct (this can happen, // for example, if SI is in an infinite loop) if (StoreBB == DestBB || OtherBB == DestBB) return false; // Verify that the other block ends in a branch and is not otherwise empty. BasicBlock::iterator BBI(OtherBB->getTerminator()); BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); if (!OtherBr || BBI == OtherBB->begin()) return false; // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. StoreInst *OtherStore = nullptr; if (OtherBr->isUnconditional()) { --BBI; // Skip over debugging info. while (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { if (BBI==OtherBB->begin()) return false; --BBI; } // If this isn't a store, isn't a store to the same location, or is not the // right kind of store, bail out. OtherStore = dyn_cast<StoreInst>(BBI); if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || !SI.isSameOperationAs(OtherStore)) return false; } else { // Otherwise, the other block ended with a conditional branch. If one of the // destinations is StoreBB, then we have the if/then case. if (OtherBr->getSuccessor(0) != StoreBB && OtherBr->getSuccessor(1) != StoreBB) return false; // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an // if/then triangle. See if there is a store to the same ptr as SI that // lives in OtherBB. for (;; --BBI) { // Check to see if we find the matching store. if ((OtherStore = dyn_cast<StoreInst>(BBI))) { if (OtherStore->getOperand(1) != SI.getOperand(1) || !SI.isSameOperationAs(OtherStore)) return false; break; } // If we find something that may be using or overwriting the stored // value, or if we run out of instructions, we can't do the xform. if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || BBI == OtherBB->begin()) return false; } // In order to eliminate the store in OtherBr, we have to // make sure nothing reads or overwrites the stored value in // StoreBB. for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { // FIXME: This should really be AA driven. if (I->mayReadFromMemory() || I->mayWriteToMemory()) return false; } } // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); PN->addIncoming(SI.getOperand(0), SI.getParent()); PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); } // Advance to a place where it is safe to insert the new store and // insert it. BBI = DestBB->getFirstInsertionPt(); StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), SI.isVolatile(), SI.getAlignment(), SI.getOrdering(), SI.getSynchScope()); InsertNewInstBefore(NewSI, *BBI); NewSI->setDebugLoc(OtherStore->getDebugLoc()); // If the two stores had AA tags, merge them. AAMDNodes AATags; SI.getAAMetadata(AATags); if (AATags) { OtherStore->getAAMetadata(AATags, /* Merge = */ true); NewSI->setAAMetadata(AATags); } // Nuke the old stores. EraseInstFromFunction(SI); EraseInstFromFunction(*OtherStore); return true; }
void ArrayObfs::ArrObfuscate ( Function *F ) { // Iterate the whole Function Function *f = F; for ( Function::iterator bb = f->begin(); bb != f->end(); ++bb ) { for ( BasicBlock::iterator inst = bb->begin(); inst != bb->end(); ) { if ( inst->getOpcode() == 29 ) // getelementptr { //errs() << "INST : " << *inst << "\n"; GetElementPtrInst *Ary = dyn_cast<GetElementPtrInst>(&*inst); Value *ptrVal = Ary->getOperand(0); Type *type = ptrVal->getType(); unsigned numOfOprand = Ary->getNumOperands(); unsigned lastOprand = numOfOprand - 1; // Check Type Array if ( PointerType *ptrType = dyn_cast<PointerType>( type ) ) { Type *elementType = ptrType->getElementType(); if ( elementType->isArrayTy() ) { // Skip if Index is a Variable if ( dyn_cast<ConstantInt>( Ary->getOperand( lastOprand ) ) ) { ////////////////////////////////////////////////////////////////////////////// // Do Real Stuff Value *oprand = Ary->getOperand( lastOprand ); Value *basePtr = Ary->getOperand( 0 ); APInt offset = dyn_cast<ConstantInt>(oprand)->getValue(); Value *prevPtr = basePtr; // Enter a Loop to Perform Random Obfuscation unsigned cnt = 100; // Prelog : Clone the Original Inst unsigned ObfsIdx = cryptoutils->get_uint64_t() & 0xffff; Value *newOprand = ConstantInt::get( oprand->getType(), ObfsIdx ); Instruction *gep = inst->clone(); gep->setOperand( lastOprand, newOprand ); gep->setOperand( 0, prevPtr ); gep->insertBefore( inst ); prevPtr = gep; offset = offset - ObfsIdx; // Create a Global Variable to Avoid Optimization Module *M = f->getParent(); Constant *initGV = ConstantInt::get( prevPtr->getType(), 0 ); GlobalVariable *gv = new GlobalVariable( *M, prevPtr->getType(), false, GlobalValue::CommonLinkage, initGV ); while ( cnt-- ) { // Iteratively Generate Obfuscated Code switch( cryptoutils->get_uint64_t() & 7 ) { // Random Indexing Obfuscation case 0 : case 1 : case 2 : { //errs() << "=> Random Index \n"; // Create New Instruction // Create Obfuscated New Oprand in ConstantInt Type unsigned ObfsIdx = cryptoutils->get_uint64_t() & 0xffff; Value *newOprand = ConstantInt::get( oprand->getType(), ObfsIdx ); // Create GetElementPtrInst Instruction GetElementPtrInst *gep = GetElementPtrInst::Create( prevPtr, newOprand, "", inst ); //Set prevPtr prevPtr = gep; //errs() << "Created : " << *prevPtr << "\n"; offset = offset - ObfsIdx; break; } // Ptr Dereference case 3 : case 4 : { //errs() << "=> Ptr Dereference \n"; Module *M = f->getParent(); Value *ONE = ConstantInt::get( Type::getInt32Ty( M->getContext() ), 1 ); Value *tmp = new AllocaInst( prevPtr->getType(), ONE, "", inst ); new StoreInst( prevPtr, tmp, inst ); prevPtr = new LoadInst( tmp, "", inst ); break; } // Ptr Value Transform case 5 : case 6 : case 7 : { //errs() << "=> Ptr Value Trans \n"; unsigned RandNum = cryptoutils->get_uint64_t(); Value *ObfsVal = ConstantInt::get( prevPtr->getType(), RandNum ); BinaryOperator *op = BinaryOperator::Create( Instruction::FAdd, prevPtr, ObfsVal, "", inst ); new StoreInst( prevPtr, gv, inst ); BinaryOperator::Create( Instruction::FSub, gv, ObfsVal, "", inst ); prevPtr = new LoadInst( gv, "", inst ); break; } } } // Postlog : Fix the Original Indexing { Value *fixOprand = ConstantInt::get( oprand->getType(), offset ); // Refine the Last Instruction GetElementPtrInst *gep = GetElementPtrInst::Create( prevPtr, fixOprand, "", inst ); // Fix the Relationship inst->replaceAllUsesWith( gep ); // Finally : Unlink This Instruction From Parent Instruction *DI = inst++; //errs() << "user_back : " << *(DI->user_back()) << "\n"; DI->removeFromParent(); } ////////////////////////////////////////////////////////////////////////////// // End : Variable Index } else { inst++; } // End : Check Array Type } else { inst++; } // End : Check Pointer Type } else { inst++; } // End : Check Opcode GetElementPtr } else { inst++; } } } ++ArrayMod; }
bool GenericToNVVM::runOnModule(Module &M) { // Create a clone of each global variable that has the default address space. // The clone is created with the global address space specifier, and the pair // of original global variable and its clone is placed in the GVMap for later // use. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;) { GlobalVariable *GV = &*I++; if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC && !llvm::isTexture(*GV) && !llvm::isSurface(*GV) && !llvm::isSampler(*GV) && !GV->getName().startswith("llvm.")) { GlobalVariable *NewGV = new GlobalVariable( M, GV->getValueType(), GV->isConstant(), GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : nullptr, "", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL); NewGV->copyAttributesFrom(GV); GVMap[GV] = NewGV; } } // Return immediately, if every global variable has a specific address space // specifier. if (GVMap.empty()) { return false; } // Walk through the instructions in function defitinions, and replace any use // of original global variables in GVMap with a use of the corresponding // copies in GVMap. If necessary, promote constants to instructions. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { if (I->isDeclaration()) { continue; } IRBuilder<> Builder(I->getEntryBlock().getFirstNonPHIOrDbg()); for (Function::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE; ++BBI) { for (BasicBlock::iterator II = BBI->begin(), IE = BBI->end(); II != IE; ++II) { for (unsigned i = 0, e = II->getNumOperands(); i < e; ++i) { Value *Operand = II->getOperand(i); if (isa<Constant>(Operand)) { II->setOperand( i, remapConstant(&M, &*I, cast<Constant>(Operand), Builder)); } } } } ConstantToValueMap.clear(); } // Copy GVMap over to a standard value map. ValueToValueMapTy VM; for (auto I = GVMap.begin(), E = GVMap.end(); I != E; ++I) VM[I->first] = I->second; // Walk through the metadata section and update the debug information // associated with the global variables in the default address space. for (NamedMDNode &I : M.named_metadata()) { remapNamedMDNode(VM, &I); } // Walk through the global variable initializers, and replace any use of // original global variables in GVMap with a use of the corresponding copies // in GVMap. The copies need to be bitcast to the original global variable // types, as we cannot use cvta in global variable initializers. for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) { GlobalVariable *GV = I->first; GlobalVariable *NewGV = I->second; // Remove GV from the map so that it can be RAUWed. Note that // DenseMap::erase() won't invalidate any iterators but this one. auto Next = std::next(I); GVMap.erase(I); I = Next; Constant *BitCastNewGV = ConstantExpr::getPointerCast(NewGV, GV->getType()); // At this point, the remaining uses of GV should be found only in global // variable initializers, as other uses have been already been removed // while walking through the instructions in function definitions. GV->replaceAllUsesWith(BitCastNewGV); std::string Name = GV->getName(); GV->eraseFromParent(); NewGV->setName(Name); } assert(GVMap.empty() && "Expected it to be empty by now"); return true; }
/// tryAggregating - When scanning forward over instructions, we look for /// other loads or stores that could be aggregated with this one. /// Returns the last instruction added (if one was added) since we might have /// removed some loads or stores and that might invalidate an iterator. Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value *StartPtr, bool DebugThis) { if (TD == 0) return 0; Module* M = StartInst->getParent()->getParent()->getParent(); LLVMContext& Context = StartInst->getContext(); Type* int8Ty = Type::getInt8Ty(Context); Type* sizeTy = Type::getInt64Ty(Context); Type* globalInt8PtrTy = int8Ty->getPointerTo(globalSpace); bool isLoad = isa<LoadInst>(StartInst); bool isStore = isa<StoreInst>(StartInst); Instruction *lastAddedInsn = NULL; Instruction *LastLoadOrStore = NULL; SmallVector<Instruction*, 8> toRemove; // Okay, so we now have a single global load/store. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemOpRanges Ranges(*TD); // Put the first store in since we want to preserve the order. Ranges.addInst(0, StartInst); BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if( isGlobalLoadOrStore(BI, globalSpace, isLoad, isStore) ) { // OK! } else { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (BI->mayWriteToMemory()) break; if (isStore && BI->mayReadFromMemory()) break; continue; } if ( isStore && isa<StoreInst>(BI) ) { StoreInst *NextStore = cast<StoreInst>(BI); // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD)) break; Ranges.addStore(Offset, NextStore); LastLoadOrStore = NextStore; } else { LoadInst *NextLoad = cast<LoadInst>(BI); if (!NextLoad->isSimple()) break; // Check to see if this load is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextLoad->getPointerOperand(), Offset, *TD)) break; Ranges.addLoad(Offset, NextLoad); LastLoadOrStore = NextLoad; } } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (!Ranges.moreThanOneOp()) return 0; // Divide the instructions between StartInst and LastLoadOrStore into // addressing, memops, and uses of memops (uses of loads) reorderAddressingMemopsUses(StartInst, LastLoadOrStore, DebugThis); Instruction* insertBefore = StartInst; IRBuilder<> builder(insertBefore); // Now that we have full information about ranges, loop over the ranges and // emit memcpy's for anything big enough to be worthwhile. for (MemOpRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemOpRange &Range = *I; Value* oldBaseI = NULL; Value* newBaseI = NULL; if (Range.TheStores.size() == 1) continue; // Don't bother if there's only one thing... builder.SetInsertPoint(insertBefore); // Otherwise, we do want to transform this! Create a new memcpy. // Get the starting pointer of the block. StartPtr = Range.StartPtr; if( DebugThis ) { errs() << "base is:"; StartPtr->dump(); } // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = TD->getABITypeAlignment(EltType); } Instruction *alloc = NULL; Value *globalPtr = NULL; // create temporary alloca space to communicate to/from. alloc = makeAlloca(int8Ty, "agg.tmp", insertBefore, Range.End-Range.Start, Alignment); // Generate the old and new base pointers before we output // anything else. { Type* iPtrTy = TD->getIntPtrType(alloc->getType()); Type* iNewBaseTy = TD->getIntPtrType(alloc->getType()); oldBaseI = builder.CreatePtrToInt(StartPtr, iPtrTy, "agg.tmp.oldb.i"); newBaseI = builder.CreatePtrToInt(alloc, iNewBaseTy, "agg.tmp.newb.i"); } // If storing, do the stores we had into our alloca'd region. if( isStore ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { StoreInst* oldStore = cast<StoreInst>(*SI); if( DebugThis ) { errs() << "have store in range:"; oldStore->dump(); } Value* ptrToAlloc = rebasePointer(oldStore->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldStore->isVolatile() || oldStore->isAtomic())); StoreInst* newStore = builder.CreateStore(oldStore->getValueOperand(), ptrToAlloc); newStore->setAlignment(oldStore->getAlignment()); newStore->takeName(oldStore); } } // cast the pointer that was load/stored to i8 if necessary. if( StartPtr->getType()->getPointerElementType() == int8Ty ) { globalPtr = StartPtr; } else { globalPtr = builder.CreatePointerCast(StartPtr, globalInt8PtrTy, "agg.cast"); } // Get a Constant* for the length. Constant* len = ConstantInt::get(sizeTy, Range.End-Range.Start, false); // Now add the memcpy instruction unsigned addrSpaceDst,addrSpaceSrc; addrSpaceDst = addrSpaceSrc = 0; if( isStore ) addrSpaceDst = globalSpace; if( isLoad ) addrSpaceSrc = globalSpace; Type *types[3]; types[0] = PointerType::get(int8Ty, addrSpaceDst); types[1] = PointerType::get(int8Ty, addrSpaceSrc); types[2] = sizeTy; Function *func = Intrinsic::getDeclaration(M, Intrinsic::memcpy, types); Value* args[5]; // dst src len alignment isvolatile if( isStore ) { // it's a store (ie put) args[0] = globalPtr; args[1] = alloc; } else { // it's a load (ie get) args[0] = alloc; args[1] = globalPtr; } args[2] = len; // alignment args[3] = ConstantInt::get(Type::getInt32Ty(Context), 0, false); // isvolatile args[4] = ConstantInt::get(Type::getInt1Ty(Context), 0, false); Instruction* aMemCpy = builder.CreateCall(func, args); /* DEBUG(dbgs() << "Replace ops:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); */ if (!Range.TheStores.empty()) aMemCpy->setDebugLoc(Range.TheStores[0]->getDebugLoc()); lastAddedInsn = aMemCpy; // If loading, load from the memcpy'd region if( isLoad ) { for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { LoadInst* oldLoad = cast<LoadInst>(*SI); if( DebugThis ) { errs() << "have load in range:"; oldLoad->dump(); } Value* ptrToAlloc = rebasePointer(oldLoad->getPointerOperand(), StartPtr, alloc, "agg.tmp", &builder, *TD, oldBaseI, newBaseI); // Old load must not be volatile or atomic... or we shouldn't have put // it in ranges assert(!(oldLoad->isVolatile() || oldLoad->isAtomic())); LoadInst* newLoad = builder.CreateLoad(ptrToAlloc); newLoad->setAlignment(oldLoad->getAlignment()); oldLoad->replaceAllUsesWith(newLoad); newLoad->takeName(oldLoad); lastAddedInsn = newLoad; } } // Save old loads/stores for removal for (SmallVector<Instruction*, 16>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { Instruction* insn = *SI; toRemove.push_back(insn); } } // Zap all the old loads/stores for (SmallVector<Instruction*, 16>::const_iterator SI = toRemove.begin(), SE = toRemove.end(); SI != SE; ++SI) { (*SI)->eraseFromParent(); } return lastAddedInsn; }
/// runOnFunction - Insert code to maintain the shadow stack. bool ShadowStackGC::performCustomLowering(Function &F) { LLVMContext &Context = F.getContext(); // Find calls to llvm.gcroot. CollectRoots(F); // If there are no roots in this function, then there is no need to add a // stack map entry for it. if (Roots.empty()) return false; // Build the constant map and figure the type of the shadow stack entry. Value *FrameMap = GetFrameMap(F); Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); // Build the shadow stack entry at the very start of the function. BasicBlock::iterator IP = F.getEntryBlock().begin(); IRBuilder<> AtEntry(IP->getParent(), IP); Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0, "gc_frame"); while (isa<AllocaInst>(IP)) ++IP; AtEntry.SetInsertPoint(IP->getParent(), IP); // Initialize the map pointer and load the current head of the shadow stack. Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, StackEntry, 0,1,"gc_frame.map"); AtEntry.CreateStore(FrameMap, EntryMapPtr); // After all the allocas... for (unsigned I = 0, E = Roots.size(); I != E; ++I) { // For each root, find the corresponding slot in the aggregate... Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root"); // And use it in lieu of the alloca. AllocaInst *OriginalAlloca = Roots[I].second; SlotPtr->takeName(OriginalAlloca); OriginalAlloca->replaceAllUsesWith(SlotPtr); } // Move past the original stores inserted by GCStrategy::InitRoots. This isn't // really necessary (the collector would never see the intermediate state at // runtime), but it's nicer not to push the half-initialized entry onto the // shadow stack. while (isa<StoreInst>(IP)) ++IP; AtEntry.SetInsertPoint(IP->getParent(), IP); // Push the entry onto the shadow stack. Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, StackEntry,0,0,"gc_frame.next"); Instruction *NewHeadVal = CreateGEP(Context, AtEntry, StackEntry, 0, "gc_newhead"); AtEntry.CreateStore(CurrentHead, EntryNextPtr); AtEntry.CreateStore(NewHeadVal, Head); // For each instruction that escapes... EscapeEnumerator EE(F, "gc_cleanup"); while (IRBuilder<> *AtExit = EE.Next()) { // Pop the entry from the shadow stack. Don't reuse CurrentHead from // AtEntry, since that would make the value live for the entire function. Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0, "gc_frame.next"); Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); AtExit->CreateStore(SavedHead, Head); } // Delete the original allocas (which are no longer used) and the intrinsic // calls (which are no longer valid). Doing this last avoids invalidating // iterators. for (unsigned I = 0, E = Roots.size(); I != E; ++I) { Roots[I].first->eraseFromParent(); Roots[I].second->eraseFromParent(); } Roots.clear(); return true; }
Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. // If the RHS is an alloca with a two uses, the other one being a // llvm.dbg.declare, zapify the store and the declare, making the // alloca dead. We must do this to prevent declares from affecting // codegen. if (!SI.isVolatile()) { if (Ptr->hasOneUse()) { if (isa<AllocaInst>(Ptr)) return EraseInstFromFunction(SI); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { if (isa<AllocaInst>(GEP->getOperand(0))) { if (GEP->getOperand(0)->hasOneUse()) return EraseInstFromFunction(SI); if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { EraseInstFromFunction(*DI); return EraseInstFromFunction(SI); } } } } if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { EraseInstFromFunction(*DI); return EraseInstFromFunction(SI); } } // Attempt to improve the alignment. if (TD) { unsigned KnownAlign = GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType())); unsigned StoreAlign = SI.getAlignment(); unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : TD->getABITypeAlignment(Val->getType()); if (KnownAlign > EffectiveStoreAlign) SI.setAlignment(KnownAlign); else if (StoreAlign == 0) SI.setAlignment(EffectiveStoreAlign); } // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. BasicBlock::iterator BBI = &SI; for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; --ScanInsts) { --BBI; // Don't count debug info directives, lest they affect codegen, // and we skip pointer-to-pointer bitcasts, which are NOPs. if (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; } if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { // Prev store isn't volatile, and stores to the same location? if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1))) { ++NumDeadStore; ++BBI; EraseInstFromFunction(*PrevSI); continue; } break; } // If this is a load, we have to stop. However, if the loaded value is from // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && !SI.isVolatile()) return EraseInstFromFunction(SI); // Otherwise, this is a load from some other location. Stores before it // may not be dead. break; } // Don't skip over loads or things that can modify memory. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; } if (SI.isVolatile()) return 0; // Don't hack volatile stores. // store X, null -> turns into 'unreachable' in SimplifyCFG if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { if (!isa<UndefValue>(Val)) { SI.setOperand(0, UndefValue::get(Val->getType())); if (Instruction *U = dyn_cast<Instruction>(Val)) Worklist.Add(U); // Dropped a use. } return 0; // Do not modify these! } // store undef, Ptr -> noop if (isa<UndefValue>(Val)) return EraseInstFromFunction(SI); // If the pointer destination is a cast, see if we can fold the cast into the // source instead. if (isa<CastInst>(Ptr)) if (Instruction *Res = InstCombineStoreToCast(*this, SI)) return Res; if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) if (CE->isCast()) if (Instruction *Res = InstCombineStoreToCast(*this, SI)) return Res; // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. BBI = &SI; do { ++BBI; } while (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) return 0; // xform done! return 0; }
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls) { // Don't insert coverage for unreachable blocks: we will never call // __sanitizer_cov() for them, so counting them in // NumberOfInstrumentedBlocks() might complicate calculation of code coverage // percentage. Also, unreachable instructions frequently have no debug // locations. if (isa<UnreachableInst>(BB.getTerminator())) return; BasicBlock::iterator IP = BB.getFirstInsertionPt(); bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; if (IsEntryBB) { if (auto SP = getDISubprogram(&F)) EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP); // Keep static allocas and llvm.localescape calls in the entry block. Even // if we aren't splitting the block, it's nice for allocas to be before // calls. IP = PrepareToSplitEntryBlock(BB, IP); } else { EntryLoc = IP->getDebugLoc(); } IRBuilder<> IRB(&*IP); IRB.SetCurrentDebugLocation(EntryLoc); Value *GuardP = IRB.CreateAdd( IRB.CreatePointerCast(GuardArray, IntptrTy), ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); if (Options.TracePC) { IRB.CreateCall(SanCovTracePC); } else if (Options.TraceBB) { IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); } else if (UseCalls) { IRB.CreateCall(SanCovWithCheckFunction, GuardP); } else { LoadInst *Load = IRB.CreateLoad(GuardP); Load->setAtomic(Monotonic); Load->setAlignment(4); SetNoSanitizeMetadata(Load); Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); Instruction *Ins = SplitBlockAndInsertIfThen( Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); IRB.SetInsertPoint(Ins); IRB.SetCurrentDebugLocation(EntryLoc); // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. IRB.CreateCall(SanCovFunction, GuardP); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. } if (Options.Use8bitCounters) { IRB.SetInsertPoint(&*IP); Value *P = IRB.CreateAdd( IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy()); LoadInst *LI = IRB.CreateLoad(P); Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1)); StoreInst *SI = IRB.CreateStore(Inc, P); SetNoSanitizeMetadata(LI); SetNoSanitizeMetadata(SI); } }
/// SplitBlockPredecessors - This method transforms BB by introducing a new /// basic block into the function, and moving some of the predecessors of BB to /// be predecessors of the new block. The new predecessors are indicated by the /// Preds array, which has NumPreds elements in it. The new block is given a /// suffix of 'Suffix'. /// /// This currently updates the LLVM IR, AliasAnalysis, DominatorTree, /// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. /// In particular, it does not preserve LoopSimplify (because it's /// complicated to handle the case where one of the edges being split /// is an exit of a loop with other exits). /// BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, unsigned NumPreds, const char *Suffix, Pass *P) { // Create new basic block, insert right before the original block. BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix, BB->getParent(), BB); // The new block unconditionally branches to the old block. BranchInst *BI = BranchInst::Create(BB, NewBB); LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0; Loop *L = LI ? LI->getLoopFor(BB) : 0; bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID); // Move the edges from Preds to point to NewBB instead of BB. // While here, if we need to preserve loop analyses, collect // some information about how this split will affect loops. bool HasLoopExit = false; bool IsLoopEntry = !!L; bool SplitMakesNewLoopHeader = false; for (unsigned i = 0; i != NumPreds; ++i) { // This is slightly more strict than necessary; the minimum requirement // is that there be no more than one indirectbr branching to BB. And // all BlockAddress uses would need to be updated. assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) && "Cannot split an edge from an IndirectBrInst"); Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB); if (LI) { // If we need to preserve LCSSA, determine if any of // the preds is a loop exit. if (PreserveLCSSA) if (Loop *PL = LI->getLoopFor(Preds[i])) if (!PL->contains(BB)) HasLoopExit = true; // If we need to preserve LoopInfo, note whether any of the // preds crosses an interesting loop boundary. if (L) { if (L->contains(Preds[i])) IsLoopEntry = false; else SplitMakesNewLoopHeader = true; } } } // Update dominator tree and dominator frontier if available. DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0; if (DT) DT->splitBlock(NewBB); if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0) DF->splitBlock(NewBB); // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI // node becomes an incoming value for BB's phi node. However, if the Preds // list is empty, we need to insert dummy entries into the PHI nodes in BB to // account for the newly created predecessor. if (NumPreds == 0) { // Insert dummy values as the incoming value. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I) cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); return NewBB; } AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0; if (L) { if (IsLoopEntry) { // Add the new block to the nearest enclosing loop (and not an // adjacent loop). To find this, examine each of the predecessors and // determine which loops enclose them, and select the most-nested loop // which contains the loop containing the block being split. Loop *InnermostPredLoop = 0; for (unsigned i = 0; i != NumPreds; ++i) if (Loop *PredLoop = LI->getLoopFor(Preds[i])) { // Seek a loop which actually contains the block being split (to // avoid adjacent loops). while (PredLoop && !PredLoop->contains(BB)) PredLoop = PredLoop->getParentLoop(); // Select the most-nested of these loops which contains the block. if (PredLoop && PredLoop->contains(BB) && (!InnermostPredLoop || InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth())) InnermostPredLoop = PredLoop; } if (InnermostPredLoop) InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase()); } else { L->addBasicBlockToLoop(NewBB, LI->getBase()); if (SplitMakesNewLoopHeader) L->moveToHeader(NewBB); } } // Otherwise, create a new PHI node in NewBB for each PHI node in BB. for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) { PHINode *PN = cast<PHINode>(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. Value *InVal = 0; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); for (unsigned i = 1; i != NumPreds; ++i) if (InVal != PN->getIncomingValueForBlock(Preds[i])) { InVal = 0; break; } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. for (unsigned i = 0; i != NumPreds; ++i) PN->removeIncomingValue(Preds[i], false); } else { // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = PHINode::Create(PN->getType(), PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. for (unsigned i = 0; i != NumPreds; ++i) { Value *V = PN->removeIncomingValue(Preds[i], false); NewPHI->addIncoming(V, Preds[i]); } InVal = NewPHI; } // Add an incoming value to the PHI node in the loop for the preheader // edge. PN->addIncoming(InVal, NewBB); } return NewBB; }
/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop /// information, and remove any dead successors it has. /// void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB, std::vector<Instruction*> &Worklist, Loop *L) { if (pred_begin(BB) != pred_end(BB)) { // This block isn't dead, since an edge to BB was just removed, see if there // are any easy simplifications we can do now. if (BasicBlock *Pred = BB->getSinglePredecessor()) { // If it has one pred, fold phi nodes in BB. while (isa<PHINode>(BB->begin())) ReplaceUsesOfWith(BB->begin(), cast<PHINode>(BB->begin())->getIncomingValue(0), Worklist, L, LPM); // If this is the header of a loop and the only pred is the latch, we now // have an unreachable loop. if (Loop *L = LI->getLoopFor(BB)) if (loopHeader == BB && L->contains(Pred)) { // Remove the branch from the latch to the header block, this makes // the header dead, which will make the latch dead (because the header // dominates the latch). LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L); Pred->getTerminator()->eraseFromParent(); new UnreachableInst(BB->getContext(), Pred); // The loop is now broken, remove it from LI. RemoveLoopFromHierarchy(L); // Reprocess the header, which now IS dead. RemoveBlockIfDead(BB, Worklist, L); return; } // If pred ends in a uncond branch, add uncond branch to worklist so that // the two blocks will get merged. if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator())) if (BI->isUnconditional()) Worklist.push_back(BI); } return; } DEBUG(dbgs() << "Nuking dead block: " << *BB); // Remove the instructions in the basic block from the worklist. for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { RemoveFromWorklist(I, Worklist); // Anything that uses the instructions in this basic block should have their // uses replaced with undefs. // If I is not void type then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!I->getType()->isVoidTy()) I->replaceAllUsesWith(UndefValue::get(I->getType())); } // If this is the edge to the header block for a loop, remove the loop and // promote all subloops. if (Loop *BBLoop = LI->getLoopFor(BB)) { if (BBLoop->getLoopLatch() == BB) { RemoveLoopFromHierarchy(BBLoop); if (currentLoop == BBLoop) { currentLoop = 0; redoLoop = false; } } } // Remove the block from the loop info, which removes it from any loops it // was in. LI->removeBlock(BB); // Remove phi node entries in successors for this block. TerminatorInst *TI = BB->getTerminator(); SmallVector<BasicBlock*, 4> Succs; for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { Succs.push_back(TI->getSuccessor(i)); TI->getSuccessor(i)->removePredecessor(BB); } // Unique the successors, remove anything with multiple uses. array_pod_sort(Succs.begin(), Succs.end()); Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end()); // Remove the basic block, including all of the instructions contained in it. LPM->deleteSimpleAnalysisValue(BB, L); BB->eraseFromParent(); // Remove successor blocks here that are not dead, so that we know we only // have dead blocks in this list. Nondead blocks have a way of becoming dead, // then getting removed before we revisit them, which is badness. // for (unsigned i = 0; i != Succs.size(); ++i) if (pred_begin(Succs[i]) != pred_end(Succs[i])) { // One exception is loop headers. If this block was the preheader for a // loop, then we DO want to visit the loop so the loop gets deleted. // We know that if the successor is a loop header, that this loop had to // be the preheader: the case where this was the latch block was handled // above and headers can only have two predecessors. if (!LI->isLoopHeader(Succs[i])) { Succs.erase(Succs.begin()+i); --i; } } for (unsigned i = 0, e = Succs.size(); i != e; ++i) RemoveBlockIfDead(Succs[i], Worklist, L); }
/// Determine whether the instructions in this range may be safely and cheaply /// speculated. This is not an important enough situation to develop complex /// heuristics. We handle a single arithmetic instruction along with any type /// conversions. static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, BasicBlock::iterator End, Loop *L) { bool seenIncrement = false; bool MultiExitLoop = false; if (!L->getExitingBlock()) MultiExitLoop = true; for (BasicBlock::iterator I = Begin; I != End; ++I) { if (!isSafeToSpeculativelyExecute(I)) return false; if (isa<DbgInfoIntrinsic>(I)) continue; switch (I->getOpcode()) { default: return false; case Instruction::GetElementPtr: // GEPs are cheap if all indices are constant. if (!cast<GEPOperator>(I)->hasAllConstantIndices()) return false; // fall-thru to increment case case Instruction::Add: case Instruction::Sub: case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: { Value *IVOpnd = !isa<Constant>(I->getOperand(0)) ? I->getOperand(0) : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr; if (!IVOpnd) return false; // If increment operand is used outside of the loop, this speculation // could cause extra live range interference. if (MultiExitLoop) { for (User *UseI : IVOpnd->users()) { auto *UserInst = cast<Instruction>(UseI); if (!L->contains(UserInst)) return false; } } if (seenIncrement) return false; seenIncrement = true; break; } case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: // ignore type conversions break; } } return true; }
/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is /// trivial: that is, that the condition controls whether or not the loop does /// anything at all. If this is a trivial condition, unswitching produces no /// code duplications (equivalently, it produces a simpler loop and a new empty /// loop, which gets deleted). /// /// If this is a trivial condition, return true, otherwise return false. When /// returning true, this sets Cond and Val to the condition that controls the /// trivial condition: when Cond dynamically equals Val, the loop is known to /// exit. Finally, this sets LoopExit to the BB that the loop exits to when /// Cond == Val. /// bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, BasicBlock **LoopExit) { BasicBlock *Header = currentLoop->getHeader(); TerminatorInst *HeaderTerm = Header->getTerminator(); LLVMContext &Context = Header->getContext(); BasicBlock *LoopExitBB = 0; if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) { // If the header block doesn't end with a conditional branch on Cond, we // can't handle it. if (!BI->isConditional() || BI->getCondition() != Cond) return false; // Check to see if a successor of the branch is guaranteed to // exit through a unique exit block without having any // side-effects. If so, determine the value of Cond that causes it to do // this. if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, BI->getSuccessor(0)))) { if (Val) *Val = ConstantInt::getTrue(Context); } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, BI->getSuccessor(1)))) { if (Val) *Val = ConstantInt::getFalse(Context); } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) { // If this isn't a switch on Cond, we can't handle it. if (SI->getCondition() != Cond) return false; // Check to see if a successor of the switch is guaranteed to go to the // latch block or exit through a one exit block without having any // side-effects. If so, determine the value of Cond that causes it to do // this. // Note that we can't trivially unswitch on the default case or // on already unswitched cases. for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i) { BasicBlock* LoopExitCandidate; if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, i.getCaseSuccessor()))) { // Okay, we found a trivial case, remember the value that is trivial. ConstantInt* CaseVal = i.getCaseValue(); // Check that it was not unswitched before, since already unswitched // trivial vals are looks trivial too. if (BranchesInfo.isUnswitched(SI, CaseVal)) continue; LoopExitBB = LoopExitCandidate; if (Val) *Val = CaseVal; break; } } } // If we didn't find a single unique LoopExit block, or if the loop exit block // contains phi nodes, this isn't trivial. if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) return false; // Can't handle this. if (LoopExit) *LoopExit = LoopExitBB; // We already know that nothing uses any scalar values defined inside of this // loop. As such, we just have to check to see if this loop will execute any // side-effecting instructions (e.g. stores, calls, volatile loads) in the // part of the loop that the code *would* execute. We already checked the // tail, check the header now. for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I) if (I->mayHaveSideEffects()) return false; return true; }
Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); // Try to canonicalize the stored type. if (combineStoreToValueType(*this, SI)) return EraseInstFromFunction(SI); // Attempt to improve the alignment. unsigned KnownAlign = getOrEnforceKnownAlignment( Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT); unsigned StoreAlign = SI.getAlignment(); unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType()); if (KnownAlign > EffectiveStoreAlign) SI.setAlignment(KnownAlign); else if (StoreAlign == 0) SI.setAlignment(EffectiveStoreAlign); // Try to canonicalize the stored type. if (unpackStoreToAggregate(*this, SI)) return EraseInstFromFunction(SI); // Replace GEP indices if possible. if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) { Worklist.Add(NewGEPI); return &SI; } // Don't hack volatile/atomic stores. // FIXME: Some bits are legal for atomic stores; needs refactoring. if (!SI.isSimple()) return nullptr; // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. if (Ptr->hasOneUse()) { if (isa<AllocaInst>(Ptr)) return EraseInstFromFunction(SI); if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { if (isa<AllocaInst>(GEP->getOperand(0))) { if (GEP->getOperand(0)->hasOneUse()) return EraseInstFromFunction(SI); } } } // Do really simple DSE, to catch cases where there are several consecutive // stores to the same location, separated by a few arithmetic operations. This // situation often occurs with bitfield accesses. BasicBlock::iterator BBI = &SI; for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; --ScanInsts) { --BBI; // Don't count debug info directives, lest they affect codegen, // and we skip pointer-to-pointer bitcasts, which are NOPs. if (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { ScanInsts++; continue; } if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { // Prev store isn't volatile, and stores to the same location? if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), SI.getOperand(1))) { ++NumDeadStore; ++BBI; EraseInstFromFunction(*PrevSI); continue; } break; } // If this is a load, we have to stop. However, if the loaded value is from // the pointer we're loading and is producing the pointer we're storing, // then *this* store is dead (X = load P; store X -> P). if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && LI->isSimple()) return EraseInstFromFunction(SI); // Otherwise, this is a load from some other location. Stores before it // may not be dead. break; } // Don't skip over loads or things that can modify memory. if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) break; } // store X, null -> turns into 'unreachable' in SimplifyCFG if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { if (!isa<UndefValue>(Val)) { SI.setOperand(0, UndefValue::get(Val->getType())); if (Instruction *U = dyn_cast<Instruction>(Val)) Worklist.Add(U); // Dropped a use. } return nullptr; // Do not modify these! } // store undef, Ptr -> noop if (isa<UndefValue>(Val)) return EraseInstFromFunction(SI); // If this store is the last instruction in the basic block (possibly // excepting debug info instructions), and if the block ends with an // unconditional branch, try to move it to the successor block. BBI = &SI; do { ++BBI; } while (isa<DbgInfoIntrinsic>(BBI) || (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) return nullptr; // xform done! return nullptr; }
bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) { if (CallInst *CI = dyn_cast<CallInst>(J)) { if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) { // Inline ASM is okay, unless it clobbers the ctr register. InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) { InlineAsm::ConstraintInfo &C = CIV[i]; if (C.Type != InlineAsm::isInput) for (unsigned j = 0, je = C.Codes.size(); j < je; ++j) if (StringRef(C.Codes[j]).equals_lower("{ctr}")) return true; } continue; } if (!TM) return true; const TargetLowering *TLI = TM->getTargetLowering(); if (Function *F = CI->getCalledFunction()) { // Most intrinsics don't become function calls, but some might. // sin, cos, exp and log are always calls. unsigned Opcode; if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { switch (F->getIntrinsicID()) { default: continue; // VisualStudio defines setjmp as _setjmp #if defined(_MSC_VER) && defined(setjmp) && \ !defined(setjmp_undefined_for_msvc) # pragma push_macro("setjmp") # undef setjmp # define setjmp_undefined_for_msvc #endif case Intrinsic::setjmp: #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc) // let's return it to _setjmp state # pragma pop_macro("setjmp") # undef setjmp_undefined_for_msvc #endif case Intrinsic::longjmp: // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp // because, although it does clobber the counter register, the // control can't then return to inside the loop unless there is also // an eh_sjlj_setjmp. case Intrinsic::eh_sjlj_setjmp: case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: case Intrinsic::powi: case Intrinsic::log: case Intrinsic::log2: case Intrinsic::log10: case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::pow: case Intrinsic::sin: case Intrinsic::cos: return true; case Intrinsic::copysign: if (CI->getArgOperand(0)->getType()->getScalarType()-> isPPC_FP128Ty()) return true; else continue; // ISD::FCOPYSIGN is never a library call. case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; } } // PowerPC does not use [US]DIVREM or other library calls for // operations on regular types which are not otherwise library calls // (i.e. soft float or atomics). If adapting for targets that do, // additional care is required here. LibFunc::Func Func; if (!F->hasLocalLinkage() && F->hasName() && LibInfo && LibInfo->getLibFunc(F->getName(), Func) && LibInfo->hasOptimizedCodeGen(Func)) { // Non-read-only functions are never treated as intrinsics. if (!CI->onlyReadsMemory()) return true; // Conversion happens only for FP calls. if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) return true; switch (Func) { default: return true; case LibFunc::copysign: case LibFunc::copysignf: continue; // ISD::FCOPYSIGN is never a library call. case LibFunc::copysignl: return true; case LibFunc::fabs: case LibFunc::fabsf: case LibFunc::fabsl: continue; // ISD::FABS is never a library call. case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl: Opcode = ISD::FSQRT; break; case LibFunc::floor: case LibFunc::floorf: case LibFunc::floorl: Opcode = ISD::FFLOOR; break; case LibFunc::nearbyint: case LibFunc::nearbyintf: case LibFunc::nearbyintl: Opcode = ISD::FNEARBYINT; break; case LibFunc::ceil: case LibFunc::ceilf: case LibFunc::ceill: Opcode = ISD::FCEIL; break; case LibFunc::rint: case LibFunc::rintf: case LibFunc::rintl: Opcode = ISD::FRINT; break; case LibFunc::round: case LibFunc::roundf: case LibFunc::roundl: Opcode = ISD::FROUND; break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: Opcode = ISD::FTRUNC; break; } MVT VTy = TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true); if (VTy == MVT::Other) return true; if (TLI->isOperationLegalOrCustom(Opcode, VTy)) continue; else if (VTy.isVector() && TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType())) continue; return true; } } return true; } else if (isa<BinaryOperator>(J) && J->getType()->getScalarType()->isPPC_FP128Ty()) { // Most operations on ppc_f128 values become calls. return true; } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) || isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) { CastInst *CI = cast<CastInst>(J); if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) || isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType())) return true; } else if (isLargeIntegerTy(TT.isArch32Bit(), J->getType()->getScalarType()) && (J->getOpcode() == Instruction::UDiv || J->getOpcode() == Instruction::SDiv || J->getOpcode() == Instruction::URem || J->getOpcode() == Instruction::SRem)) { return true; } else if (TT.isArch32Bit() && isLargeIntegerTy(false, J->getType()->getScalarType()) && (J->getOpcode() == Instruction::Shl || J->getOpcode() == Instruction::AShr || J->getOpcode() == Instruction::LShr)) { // Only on PPC32, for 128-bit integers (specifically not 64-bit // integers), these might be runtime calls. return true; } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) { // On PowerPC, indirect jumps use the counter register. return true; } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) { if (!TM) return true; const TargetLowering *TLI = TM->getTargetLowering(); if (TLI->supportJumpTables() && SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries()) return true; } } return false; }
string getOperandName(BasicBlock::iterator inst) { return (inst->getOperand(1))->getName().str(); }
void StatsTracker::computeReachableUncovered() { KModule *km = executor.kmodule; Module *m = km->module; static bool init = true; const InstructionInfoTable &infos = *km->infos; StatisticManager &sm = *theStatisticManager; if (init) { init = false; // Compute call targets. It would be nice to use alias information // instead of assuming all indirect calls hit all escaping // functions, eh? for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); fnIt != fn_ie; ++fnIt) { for (Function::iterator bbIt = fnIt->begin(), bb_ie = fnIt->end(); bbIt != bb_ie; ++bbIt) { for (BasicBlock::iterator it = bbIt->begin(), ie = bbIt->end(); it != ie; ++it) { if (isa<CallInst>(it) || isa<InvokeInst>(it)) { if (isa<InlineAsm>(it->getOperand(0))) { // We can never call through here so assume no targets // (which should be correct anyhow). callTargets.insert(std::make_pair(it, std::vector<Function*>())); } else if (Function *target = getDirectCallTarget(it)) { callTargets[it].push_back(target); } else { callTargets[it] = std::vector<Function*>(km->escapingFunctions.begin(), km->escapingFunctions.end()); } } } } } // Compute function callers as reflexion of callTargets. for (calltargets_ty::iterator it = callTargets.begin(), ie = callTargets.end(); it != ie; ++it) for (std::vector<Function*>::iterator fit = it->second.begin(), fie = it->second.end(); fit != fie; ++fit) functionCallers[*fit].push_back(it->first); // Initialize minDistToReturn to shortest paths through // functions. 0 is unreachable. std::vector<Instruction *> instructions; for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); fnIt != fn_ie; ++fnIt) { if (fnIt->isDeclaration()) { if (fnIt->doesNotReturn()) { functionShortestPath[fnIt] = 0; } else { functionShortestPath[fnIt] = 1; // whatever } continue; } else { functionShortestPath[fnIt] = 0; } KFunction *kf = km->functionMap[fnIt]; for (unsigned i = 0; i < kf->numInstructions; ++i) { Instruction *inst = kf->instrPostOrder[i]->inst; instructions.push_back(inst); sm.setIndexedValue(stats::minDistToReturn, kf->instrPostOrder[i]->info->id, isa<ReturnInst>(inst)); } } // I'm so lazy it's not even worklisted. bool changed; do { changed = false; for (std::vector<Instruction*>::iterator it = instructions.begin(), ie = instructions.end(); it != ie; ++it) { Instruction *inst = *it; unsigned bestThrough = 0; if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) { std::vector<Function*> &targets = callTargets[inst]; for (std::vector<Function*>::iterator fnIt = targets.begin(), ie = targets.end(); fnIt != ie; ++fnIt) { uint64_t dist = functionShortestPath[*fnIt]; if (dist) { dist = 1+dist; // count instruction itself if (bestThrough==0 || dist<bestThrough) bestThrough = dist; } } } else { bestThrough = 1; } if (bestThrough) { unsigned id = infos.getInfo(*it).id; uint64_t best, cur = best = sm.getIndexedValue(stats::minDistToReturn, id); std::vector<Instruction*> succs = getSuccs(*it); for (std::vector<Instruction*>::iterator it2 = succs.begin(), ie = succs.end(); it2 != ie; ++it2) { uint64_t dist = sm.getIndexedValue(stats::minDistToReturn, infos.getInfo(*it2).id); if (dist) { uint64_t val = bestThrough + dist; if (best==0 || val<best) best = val; } } if (best != cur) { sm.setIndexedValue(stats::minDistToReturn, id, best); changed = true; // Update shortest path if this is the entry point. Function *f = inst->getParent()->getParent(); if (inst==f->begin()->begin()) functionShortestPath[f] = best; } } } } while (changed); } // compute minDistToUncovered, 0 is unreachable std::vector<Instruction *> instructions; std::vector<unsigned> ids; for (Module::iterator fnIt = m->begin(), fn_ie = m->end(); fnIt != fn_ie; ++fnIt) { if (fnIt->isDeclaration()) continue; KFunction *kf = km->functionMap[fnIt]; for (unsigned i = 0; i < kf->numInstructions; ++i) { Instruction *inst = kf->instrPostOrder[i]->inst; unsigned id = kf->instrPostOrder[i]->info->id; instructions.push_back(inst); ids.push_back(id); sm.setIndexedValue(stats::minDistToGloballyUncovered, id, sm.getIndexedValue(stats::globallyUncoveredInstructions, id)); } } // I'm so lazy it's not even worklisted. bool changed; do { changed = false; for (unsigned i = 0; i < instructions.size(); ++i) { Instruction *inst = instructions[i]; unsigned id = ids[i]; uint64_t best, cur = best = sm.getIndexedValue(stats::minDistToGloballyUncovered, id); unsigned bestThrough = 0; if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) { std::vector<Function*> &targets = callTargets[inst]; for (std::vector<Function*>::iterator fnIt = targets.begin(), ie = targets.end(); fnIt != ie; ++fnIt) { uint64_t dist = functionShortestPath[*fnIt]; if (dist) { dist = 1+dist; // count instruction itself if (bestThrough==0 || dist<bestThrough) bestThrough = dist; } if (!(*fnIt)->isDeclaration()) { uint64_t calleeDist = sm.getIndexedValue(stats::minDistToGloballyUncovered, infos.getFunctionInfo(*fnIt).id); if (calleeDist) { calleeDist = 1+calleeDist; // count instruction itself if (best==0 || calleeDist<best) best = calleeDist; } } } } else { bestThrough = 1; } if (bestThrough) { std::vector<Instruction*> succs = getSuccs(inst); for (std::vector<Instruction*>::iterator it2 = succs.begin(), ie = succs.end(); it2 != ie; ++it2) { uint64_t dist = sm.getIndexedValue(stats::minDistToGloballyUncovered, infos.getInfo(*it2).id); if (dist) { uint64_t val = bestThrough + dist; if (best==0 || val<best) best = val; } } } if (best != cur) { sm.setIndexedValue(stats::minDistToGloballyUncovered, infos.getInfo(inst).id, best); changed = true; } } } while (changed); for (std::set<ExecutionState*>::iterator it = executor.states.begin(), ie = executor.states.end(); it != ie; ++it) { ExecutionState *es = *it; uint64_t currentFrameMinDist = 0; for (ExecutionState::stack_ty::iterator sfIt = es->stack().begin(), sf_ie = es->stack().end(); sfIt != sf_ie; ++sfIt) { ExecutionState::stack_ty::iterator next = sfIt + 1; KInstIterator kii; if (next==es->stack().end()) { kii = es->pc(); } else { kii = next->caller; ++kii; } sfIt->minDistToUncoveredOnReturn = currentFrameMinDist; currentFrameMinDist = computeMinDistToUncovered(kii, currentFrameMinDist); } } LOG(INFO) << "Processed " << instructions.size() << " instructions in static analysis"; }
/// processModule - Process entire module and collect debug info. void DebugInfoFinder::processModule(Module &M) { if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CU(CU_Nodes->getOperand(i)); addCompileUnit(CU); if (CU.getVersion() > LLVMDebugVersion10) { DIArray GVs = CU.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) { DIGlobalVariable DIG(GVs.getElement(i)); if (addGlobalVariable(DIG)) processType(DIG.getType()); } DIArray SPs = CU.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) processSubprogram(DISubprogram(SPs.getElement(i))); DIArray EnumTypes = CU.getEnumTypes(); for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) processType(DIType(EnumTypes.getElement(i))); DIArray RetainedTypes = CU.getRetainedTypes(); for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) processType(DIType(RetainedTypes.getElement(i))); return; } } } for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI) for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE; ++BI) { if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) processDeclare(DDI); DebugLoc Loc = BI->getDebugLoc(); if (Loc.isUnknown()) continue; LLVMContext &Ctx = BI->getContext(); DIDescriptor Scope(Loc.getScope(Ctx)); if (Scope.isCompileUnit()) addCompileUnit(DICompileUnit(Scope)); else if (Scope.isSubprogram()) processSubprogram(DISubprogram(Scope)); else if (Scope.isLexicalBlockFile()) { DILexicalBlockFile DBF = DILexicalBlockFile(Scope); processLexicalBlock(DILexicalBlock(DBF.getScope())); } else if (Scope.isLexicalBlock()) processLexicalBlock(DILexicalBlock(Scope)); if (MDNode *IA = Loc.getInlinedAt(Ctx)) processLocation(DILocation(IA)); } if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i))); if (addGlobalVariable(DIG)) { if (DIG.getVersion() <= LLVMDebugVersion10) addCompileUnit(DIG.getCompileUnit()); processType(DIG.getType()); } } } if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) processSubprogram(DISubprogram(NMD->getOperand(i))); }
bool CopyMinimizationPass::runOnFunction(Function& f) { CurrentFile::set(__FILE__); bool changed = false ; if (f.isDeclaration() || f.getDFFunction() == NULL) { return changed ; } std::vector<Node<DFBasicBlock*>*> graph; //We need to start with an acyclic graph to do retiming on - //the analyzeCopyPass conveniently provides a function that returns the //blocks reachable by the sink. These are the same blocks that the //detectLoopsPass goes through, so they should be acyclical. std::map<DFBasicBlock*, bool> pipeBlocks = getPipelineBlocks(f); std::map<DFBasicBlock*,Node<DFBasicBlock*>*> nodeMap; //for each block in the DFG, create a Node for it. //this means we need to provide a weight for the block, //and we also want to save it in a map for later for(std::map<DFBasicBlock*, bool>::iterator BB = pipeBlocks.begin(); BB != pipeBlocks.end(); ++BB) { if( BB->second == false ) continue; Node<DFBasicBlock*>* node = new Node<DFBasicBlock*>(BB->first, BB->first->getPipelineLevel(), BB->first->getDelay()); nodeMap[BB->first] = node; graph.push_back(node); } //for each edge in the DFG, we need to create an Edge in the graph for(std::map<DFBasicBlock*, bool>::iterator BB = pipeBlocks.begin(); BB != pipeBlocks.end(); ++BB) { if( BB->second == false ) continue; CallInst* CI = dynamic_cast<CallInst*>(BB->first->getFirstNonPHI()); if( isROCCCFunctionCall(CI, ROCCCNames::LoadPrevious) ) continue; for(pred_iterator pred = pred_begin(BB->first); pred != pred_end(BB->first); ++pred) { assert( (*pred)->getDFBasicBlock() ); std::map<DFBasicBlock*,Node<DFBasicBlock*>*>::iterator predNode = nodeMap.find((*pred)->getDFBasicBlock()); if( predNode != nodeMap.end() ) { assert( nodeMap[BB->first] ); int weight = 0; std::map<llvm::Value*,bool> valsUsed; for(BasicBlock::iterator II = BB->first->begin(); II != BB->first->end(); ++II) { for(User::op_iterator OP = II->op_begin(); OP != II->op_end(); ++OP) { valsUsed[*OP] = true; } } for(BasicBlock::iterator PII = (*pred)->begin(); PII != (*pred)->end(); ++PII) { for(std::map<llvm::Value*,bool>::iterator VUI = valsUsed.begin(); VUI != valsUsed.end(); ++VUI) { if( isDefinition(PII, VUI->first) ) { weight += getSizeInBits(VUI->first); } } } predNode->second->flowsInto(*nodeMap[BB->first], weight); } else { INTERNAL_WARNING((*pred)->getName() << " was not found!\n"); } } } int minimum_copies = getTotalCopiedBits(graph); int num_original_copied_bits = minimum_copies; std::vector< Node<DFBasicBlock*>* > min_graph = createCopyOfGraph(graph); int iterationCount = 0; int minIterationCount = 0; LOG_MESSAGE2("Pipelining", "Register Minimization", "Bit registers needed to pipeline original graph, including both copies and pipeline boundaries: " << minimum_copies << ".\n"); while(maxTripCount(graph) < 10) { //cout << iterationCount << " - " << maxTripCount(graph) << "\n"; ++iterationCount; //display(graph); Edge< Node<DFBasicBlock*> >* largest = selectEdgeWithLargestWeight(graph); if( largest ) { //LOG_MESSAGE2("Pipelining", "Register Minimization", "Edge with most bit registers needed: " << largest->source->getData() << " -> " << largest->sink->getData() << "\n"); tightenEdge(largest); largest->tripCount++; } else { LOG_MESSAGE2("Pipelining", "Register Minimization", "Completely reduced.\n"); break; } //LOG_MESSAGE2("Pipelining", "Register Minimization", "Iteration " << iterationCount << " - total number of bits registers needed: " << getTotalCopiedBits(graph) << ".\n"); if( getTotalCopiedBits(graph) < minimum_copies ) { min_graph = createCopyOfGraph(graph); minimum_copies = getTotalCopiedBits(graph); ++minIterationCount; } } LOG_MESSAGE2("Pipelining", "Register Minimization", "Tested " << iterationCount << " total iterations of minimizing; minimum number of bit registers needed, " << minimum_copies << ", was found after " << minIterationCount << " iterations.\n"); //display(min_graph); if( minimum_copies != num_original_copied_bits ) { for(std::vector<Node<DFBasicBlock*>*>::const_iterator GI = graph.begin(); GI != graph.end(); ++GI) { int level = (*GI)->getPosition(); (*GI)->getData()->setPipelineLevel(level); (*GI)->getData()->setDataflowLevel(level); std::stringstream ss; ss << (*GI)->getData()->getName() << "_" << (*GI)->getData()->getPipelineLevel(); (*GI)->getData()->setName(ss.str()); } changed = true; } return changed ; }
/// When scanning forward over instructions, we look for some other patterns to /// fold away. In particular, this looks for stores to neighboring locations of /// memory. If it sees enough consecutive ones, it attempts to merge them /// together into a memcpy/memset. Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { const DataLayout &DL = StartInst->getModule()->getDataLayout(); // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. // Join these together into ranges, so we can decide whether contiguous blocks // are stored. MemsetRanges Ranges(DL); BasicBlock::iterator BI = StartInst; for (++BI; !isa<TerminatorInst>(BI); ++BI) { if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { // If the instruction is readnone, ignore it, otherwise bail out. We // don't even allow readonly here because we don't want something like: // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) break; continue; } if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { // If this is a store, see if we can merge it in. if (!NextStore->isSimple()) break; // Check to see if this stored value is of the same byte-splattable value. if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, DL)) break; Ranges.addStore(Offset, NextStore); } else { MemSetInst *MSI = cast<MemSetInst>(BI); if (MSI->isVolatile() || ByteVal != MSI->getValue() || !isa<ConstantInt>(MSI->getLength())) break; // Check to see if this store is to a constant offset from the start ptr. int64_t Offset; if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL)) break; Ranges.addMemSet(Offset, MSI); } } // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (Ranges.empty()) return nullptr; // If we had at least one store that could be merged in, add the starting // store as well. We try to avoid this unless there is at least something // interesting as a small compile-time optimization. Ranges.addInst(0, StartInst); // If we create any memsets, we put it right before the first instruction that // isn't part of the memset block. This ensure that the memset is dominated // by any addressing instruction needed by the start of the block. IRBuilder<> Builder(BI); // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. Instruction *AMemSet = nullptr; for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemsetRange &Range = *I; if (Range.TheStores.size() == 1) continue; // If it is profitable to lower this range to memset, do so now. if (!Range.isProfitableToUseMemset(DL)) continue; // Otherwise, we do want to transform this! Create a new memset. // Get the starting pointer of the block. StartPtr = Range.StartPtr; // Determine alignment unsigned Alignment = Range.Alignment; if (Alignment == 0) { Type *EltType = cast<PointerType>(StartPtr->getType())->getElementType(); Alignment = DL.getABITypeAlignment(EltType); } AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); DEBUG(dbgs() << "Replace stores:\n"; for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) dbgs() << *Range.TheStores[i] << '\n'; dbgs() << "With: " << *AMemSet << '\n'); if (!Range.TheStores.empty()) AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); // Zap all the stores. for (SmallVectorImpl<Instruction *>::const_iterator SI = Range.TheStores.begin(), SE = Range.TheStores.end(); SI != SE; ++SI) { MD->removeInstruction(*SI); (*SI)->eraseFromParent(); } ++NumMemSetInfer; } return AMemSet; }
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto, /// except that it does some simple constant prop and DCE on the fly. The /// effect of this is to copy significantly less code in cases where (for /// example) a function call with constant arguments is inlined, and those /// constant arguments cause a significant amount of code in the callee to be /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, DenseMap<const Value*, Value*> &ValueMap, SmallVectorImpl<ReturnInst*> &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, const TargetData *TD, Instruction *TheCall) { assert(NameSuffix && "NameSuffix cannot be null!"); #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), E = OldFunc->arg_end(); II != E; ++II) assert(ValueMap.count(II) && "No mapping from source argument specified!"); #endif PruningFunctionCloner PFC(NewFunc, OldFunc, ValueMap, Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. std::vector<const BasicBlock*> CloneWorklist; CloneWorklist.push_back(&OldFunc->getEntryBlock()); while (!CloneWorklist.empty()) { const BasicBlock *BB = CloneWorklist.back(); CloneWorklist.pop_back(); PFC.CloneBlock(BB, CloneWorklist); } // Loop over all of the basic blocks in the old function. If the block was // reachable, we have cloned it and the old block is now in the value map: // insert it into the new function in the right order. If not, ignore it. // // Defer PHI resolution until rest of function is resolved. SmallVector<const PHINode*, 16> PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]); if (NewBB == 0) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Loop over all of the instructions in the block, fixing up operand // references as we go. This uses ValueMap to do all the hard work. // BasicBlock::iterator I = NewBB->begin(); LLVMContext &Context = OldFunc->getContext(); unsigned DbgKind = Context.getMetadata().getMDKind("dbg"); MDNode *TheCallMD = NULL; SmallVector<Value *, 4> MDVs; if (TheCall && TheCall->hasMetadata()) TheCallMD = Context.getMetadata().getMD(DbgKind, TheCall); // Handle PHI nodes specially, as we have to remove references to dead // blocks. if (PHINode *PN = dyn_cast<PHINode>(I)) { // Skip over all PHI nodes, remembering them for later. BasicBlock::const_iterator OldI = BI->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) { if (I->hasMetadata()) { if (TheCallMD) { if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) { MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context); Context.getMetadata().addMD(DbgKind, NewMD, I); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. Context.getMetadata().removeMD(DbgKind, I); } } PHIToResolve.push_back(cast<PHINode>(OldI)); } } // Otherwise, remap the rest of the instructions normally. for (; I != NewBB->end(); ++I) { if (I->hasMetadata()) { if (TheCallMD) { if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) { MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context); Context.getMetadata().addMD(DbgKind, NewMD, I); } } else { // The cloned instruction has dbg info but the call instruction // does not have dbg info. Remove dbg info from cloned instruction. Context.getMetadata().removeMD(DbgKind, I); } } RemapInstruction(I, ValueMap); } } // Defer PHI resolution until rest of function is resolved, PHI resolution // requires the CFG to be up-to-date. for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) { const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); BasicBlock *NewBB = cast<BasicBlock>(ValueMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; PHINode *PN = cast<PHINode>(ValueMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) { Value *InVal = MapValue(PN->getIncomingValue(pred), ValueMap); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); } else { PN->removeIncomingValue(pred, false); --pred, --e; // Revisit the next entry. } } } // The loop above has removed PHI entries for those blocks that are dead // and has updated others. However, if a block is live (i.e. copied over) // but its terminator has been changed to not go to this block, then our // phi nodes will have invalid entries. Update the PHI nodes in this // case. PHINode *PN = cast<PHINode>(NewBB->begin()); NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); if (NumPreds != PN->getNumIncomingValues()) { assert(NumPreds < PN->getNumIncomingValues()); // Count how many times each predecessor comes to this block. std::map<BasicBlock*, unsigned> PredCount; for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI) --PredCount[*PI]; // Figure out how many entries to remove from each PHI. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) ++PredCount[PN->getIncomingBlock(i)]; // At this point, the excess predecessor entries are positive in the // map. Loop over all of the PHIs and remove excess predecessor // entries. BasicBlock::iterator I = NewBB->begin(); for (; (PN = dyn_cast<PHINode>(I)); ++I) { for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(), E = PredCount.end(); PCI != E; ++PCI) { BasicBlock *Pred = PCI->first; for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove) PN->removeIncomingValue(Pred, false); } } } // If the loops above have made these phi nodes have 0 or 1 operand, // replace them with undef or the input value. We must do this for // correctness, because 0-operand phis are not valid. PN = cast<PHINode>(NewBB->begin()); if (PN->getNumIncomingValues() == 0) { BasicBlock::iterator I = NewBB->begin(); BasicBlock::const_iterator OldI = OldBB->begin(); while ((PN = dyn_cast<PHINode>(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); assert(ValueMap[OldI] == PN && "ValueMap mismatch"); ValueMap[OldI] = NV; PN->eraseFromParent(); ++OldI; } } // NOTE: We cannot eliminate single entry phi nodes here, because of // ValueMap. Single entry phi nodes can have multiple ValueMap entries // pointing at them. Thus, deleting one would require scanning the ValueMap // to update any entries in it that would require that. This would be // really slow. } // Now that the inlined function body has been fully constructed, go through // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. Function::iterator I = cast<BasicBlock>(ValueMap[&OldFunc->getEntryBlock()]); while (I != NewFunc->end()) { BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } // Note that we can't eliminate uncond branches if the destination has // single-entry PHI nodes. Eliminating the single-entry phi nodes would // require scanning the ValueMap to update any entries that point to the phi // node. BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) { ++I; continue; } // We know all single-entry PHI nodes in the inlined function have been // removed, so we just need to splice the blocks. BI->eraseFromParent(); // Move all the instructions in the succ to the pred. I->getInstList().splice(I->end(), Dest->getInstList()); // Make all PHI nodes that referred to Dest now refer to I as their source. Dest->replaceAllUsesWith(I); // Remove the dest block. Dest->eraseFromParent(); // Do not increment I, iteratively merge all things this block branches to. } }
void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls) { BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end(); // Skip static allocas at the top of the entry block so they don't become // dynamic when we split the block. If we used our optimized stack layout, // then there will only be one alloca and it will come first. for (; IP != BE; ++IP) { AllocaInst *AI = dyn_cast<AllocaInst>(IP); if (!AI || !AI->isStaticAlloca()) break; } bool IsEntryBB = &BB == &F.getEntryBlock(); DebugLoc EntryLoc; if (IsEntryBB) { if (auto SP = getDISubprogram(&F)) EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP); } else { EntryLoc = IP->getDebugLoc(); } IRBuilder<> IRB(IP); IRB.SetCurrentDebugLocation(EntryLoc); SmallVector<Value *, 1> Indices; Value *GuardP = IRB.CreateAdd( IRB.CreatePointerCast(GuardArray, IntptrTy), ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4)); Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty()); GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy); if (UseCalls) { IRB.CreateCall(SanCovWithCheckFunction, GuardP); } else { LoadInst *Load = IRB.CreateLoad(GuardP); Load->setAtomic(Monotonic); Load->setAlignment(4); SetNoSanitizeMetadata(Load); Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load); Instruction *Ins = SplitBlockAndInsertIfThen( Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); IRB.SetInsertPoint(Ins); IRB.SetCurrentDebugLocation(EntryLoc); // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC. IRB.CreateCall(SanCovFunction, GuardP); IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge. } if (Options.Use8bitCounters) { IRB.SetInsertPoint(IP); Value *P = IRB.CreateAdd( IRB.CreatePointerCast(EightBitCounterArray, IntptrTy), ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1)); P = IRB.CreateIntToPtr(P, IRB.getInt8PtrTy()); LoadInst *LI = IRB.CreateLoad(P); Value *Inc = IRB.CreateAdd(LI, ConstantInt::get(IRB.getInt8Ty(), 1)); StoreInst *SI = IRB.CreateStore(Inc, P); SetNoSanitizeMetadata(LI); SetNoSanitizeMetadata(SI); } if (Options.TraceBB) { // Experimental support for tracing. // Insert a callback with the same guard variable as used for coverage. IRB.SetInsertPoint(IP); IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP); } }
/// isSafeToLoadUnconditionally - Return true if we know that executing a load /// from this value cannot trap. If it is not obviously safe to load from the /// specified pointer, we do a quick local scan of the basic block containing /// ScanFrom, to determine if the address is already accessed. bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, unsigned Align, const TargetData *TD) { uint64_t ByteOffset = 0; Value *Base = V; if (TD) Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); const Type *BaseType = 0; unsigned BaseAlign = 0; if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { // An alloca is safe to load from as load as it is suitably aligned. BaseType = AI->getAllocatedType(); BaseAlign = AI->getAlignment(); } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) { // Global variables are safe to load from but their size cannot be // guaranteed if they are overridden. if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) { BaseType = GV->getType()->getElementType(); BaseAlign = GV->getAlignment(); } } if (BaseType && BaseType->isSized()) { if (TD && BaseAlign == 0) BaseAlign = TD->getPrefTypeAlignment(BaseType); if (Align <= BaseAlign) { if (!TD) return true; // Loading directly from an alloca or global is OK. // Check if the load is within the bounds of the underlying object. const PointerType *AddrTy = cast<PointerType>(V->getType()); uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && (Align == 0 || (ByteOffset % Align) == 0)) return true; } } // Otherwise, be a little bit aggressive by scanning the local block where we // want to check to see if the pointer is already being loaded or stored // from/to. If so, the previous load or store would have already trapped, // so there is no harm doing an extra load (also, CSE will later eliminate // the load entirely). BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); while (BBI != E) { --BBI; // If we see a free or a call which may write to memory (i.e. which might do // a free) the pointer could be marked invalid. if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() && !isa<DbgInfoIntrinsic>(BBI)) return false; if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true; } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true; } } return false; }
/// Evaluate all instructions in block BB, returning true if successful, false /// if we can't evaluate it. NewBB returns the next BB that control flows into, /// or null upon return. bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB) { // This is the main evaluation loop. while (1) { Constant *InstResult = nullptr; DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) { if (!SI->isSimple()) { DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); return false; // no volatile/atomic accesses. } Constant *Ptr = getVal(SI->getOperand(1)); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); Ptr = ConstantFoldConstantExpression(CE, DL, TLI); DEBUG(dbgs() << "; To: " << *Ptr << "\n"); } if (!isSimpleEnoughPointerToCommit(Ptr)) { // If this is too complex for us to commit, reject it. DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); return false; } Constant *Val = getVal(SI->getOperand(0)); // If this might be too difficult for the backend to handle (e.g. the addr // of one global variable divided by another) then we can't commit it. if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val << "\n"); return false; } if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { if (CE->getOpcode() == Instruction::BitCast) { DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n"); // If we're evaluating a store through a bitcast, then we need // to pull the bitcast off the pointer type and push it onto the // stored value. Ptr = CE->getOperand(0); Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType(); // In order to push the bitcast onto the stored value, a bitcast // from NewTy to Val's type must be legal. If it's not, we can try // introspecting NewTy to find a legal conversion. while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { // If NewTy is a struct, we can convert the pointer to the struct // into a pointer to its first member. // FIXME: This could be extended to support arrays as well. if (StructType *STy = dyn_cast<StructType>(NewTy)) { NewTy = STy->getTypeAtIndex(0U); IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32); Constant *IdxZero = ConstantInt::get(IdxTy, 0, false); Constant * const IdxList[] = {IdxZero, IdxZero}; Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) Ptr = ConstantFoldConstantExpression(CE, DL, TLI); // If we can't improve the situation by introspecting NewTy, // we have to give up. } else { DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " "evaluate.\n"); return false; } } // If we found compatible types, go ahead and push the bitcast // onto the stored value. Val = ConstantExpr::getBitCast(Val, NewTy); DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); } } MutatedMemory[Ptr] = Val; } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) { InstResult = ConstantExpr::get(BO->getOpcode(), getVal(BO->getOperand(0)), getVal(BO->getOperand(1))); DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult << "\n"); } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) { InstResult = ConstantExpr::getCompare(CI->getPredicate(), getVal(CI->getOperand(0)), getVal(CI->getOperand(1))); DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult << "\n"); } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) { InstResult = ConstantExpr::getCast(CI->getOpcode(), getVal(CI->getOperand(0)), CI->getType()); DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult << "\n"); } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) { InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)), getVal(SI->getOperand(1)), getVal(SI->getOperand(2))); DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult << "\n"); } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) { InstResult = ConstantExpr::getExtractValue( getVal(EVI->getAggregateOperand()), EVI->getIndices()); DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult << "\n"); } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) { InstResult = ConstantExpr::getInsertValue( getVal(IVI->getAggregateOperand()), getVal(IVI->getInsertedValueOperand()), IVI->getIndices()); DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult << "\n"); } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) { Constant *P = getVal(GEP->getOperand(0)); SmallVector<Constant*, 8> GEPOps; for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e; ++i) GEPOps.push_back(getVal(*i)); InstResult = ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps, cast<GEPOperator>(GEP)->isInBounds()); DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n"); } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) { if (!LI->isSimple()) { DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); return false; // no volatile/atomic accesses. } Constant *Ptr = getVal(LI->getOperand(0)); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) { Ptr = ConstantFoldConstantExpression(CE, DL, TLI); DEBUG(dbgs() << "Found a constant pointer expression, constant " "folding: " << *Ptr << "\n"); } InstResult = ComputeLoadResult(Ptr); if (!InstResult) { DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." "\n"); return false; // Could not evaluate load. } DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) { if (AI->isArrayAllocation()) { DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); return false; // Cannot handle array allocs. } Type *Ty = AI->getAllocatedType(); AllocaTmps.push_back( make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), AI->getName())); InstResult = AllocaTmps.back().get(); DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) { CallSite CS(&*CurInst); // Debug info can safely be ignored here. if (isa<DbgInfoIntrinsic>(CS.getInstruction())) { DEBUG(dbgs() << "Ignoring debug info.\n"); ++CurInst; continue; } // Cannot handle inline asm. if (isa<InlineAsm>(CS.getCalledValue())) { DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); return false; } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) { if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) { if (MSI->isVolatile()) { DEBUG(dbgs() << "Can not optimize a volatile memset " << "intrinsic.\n"); return false; } Constant *Ptr = getVal(MSI->getDest()); Constant *Val = getVal(MSI->getValue()); Constant *DestVal = ComputeLoadResult(getVal(Ptr)); if (Val->isNullValue() && DestVal && DestVal->isNullValue()) { // This memset is a no-op. DEBUG(dbgs() << "Ignoring no-op memset.\n"); ++CurInst; continue; } } if (II->getIntrinsicID() == Intrinsic::lifetime_start || II->getIntrinsicID() == Intrinsic::lifetime_end) { DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); ++CurInst; continue; } if (II->getIntrinsicID() == Intrinsic::invariant_start) { // We don't insert an entry into Values, as it doesn't have a // meaningful return value. if (!II->use_empty()) { DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n"); return false; } ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0)); Value *PtrArg = getVal(II->getArgOperand(1)); Value *Ptr = PtrArg->stripPointerCasts(); if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) { Type *ElemTy = GV->getValueType(); if (!Size->isAllOnesValue() && Size->getValue().getLimitedValue() >= DL.getTypeStoreSize(ElemTy)) { Invariants.insert(GV); DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV << "\n"); } else { DEBUG(dbgs() << "Found a global var, but can not treat it as an " "invariant.\n"); } } // Continue even if we do nothing. ++CurInst; continue; } else if (II->getIntrinsicID() == Intrinsic::assume) { DEBUG(dbgs() << "Skipping assume intrinsic.\n"); ++CurInst; continue; } DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); return false; } // Resolve function pointers. Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue())); if (!Callee || Callee->isInterposable()) { DEBUG(dbgs() << "Can not resolve function pointer.\n"); return false; // Cannot resolve. } SmallVector<Constant*, 8> Formals; for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) Formals.push_back(getVal(*i)); if (Callee->isDeclaration()) { // If this is a function we can constant fold, do it. if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) { InstResult = C; DEBUG(dbgs() << "Constant folded function call. Result: " << *InstResult << "\n"); } else { DEBUG(dbgs() << "Can not constant fold function call.\n"); return false; } } else { if (Callee->getFunctionType()->isVarArg()) { DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); return false; } Constant *RetVal = nullptr; // Execute the call, if successful, use the return value. ValueStack.emplace_back(); if (!EvaluateFunction(Callee, RetVal, Formals)) { DEBUG(dbgs() << "Failed to evaluate function.\n"); return false; } ValueStack.pop_back(); InstResult = RetVal; if (InstResult) { DEBUG(dbgs() << "Successfully evaluated function. Result: " << *InstResult << "\n\n"); } else { DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n"); } } } else if (isa<TerminatorInst>(CurInst)) { DEBUG(dbgs() << "Found a terminator instruction.\n"); if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) { if (BI->isUnconditional()) { NextBB = BI->getSuccessor(0); } else { ConstantInt *Cond = dyn_cast<ConstantInt>(getVal(BI->getCondition())); if (!Cond) return false; // Cannot determine. NextBB = BI->getSuccessor(!Cond->getZExtValue()); } } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) { ConstantInt *Val = dyn_cast<ConstantInt>(getVal(SI->getCondition())); if (!Val) return false; // Cannot determine. NextBB = SI->findCaseValue(Val).getCaseSuccessor(); } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) { Value *Val = getVal(IBI->getAddress())->stripPointerCasts(); if (BlockAddress *BA = dyn_cast<BlockAddress>(Val)) NextBB = BA->getBasicBlock(); else return false; // Cannot determine. } else if (isa<ReturnInst>(CurInst)) { NextBB = nullptr; } else { // invoke, unwind, resume, unreachable. DEBUG(dbgs() << "Can not handle terminator."); return false; // Cannot handle this terminator. } // We succeeded at evaluating this block! DEBUG(dbgs() << "Successfully evaluated block.\n"); return true; } else { // Did not know how to evaluate this! DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction." "\n"); return false; } if (!CurInst->use_empty()) { if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult)) InstResult = ConstantFoldConstantExpression(CE, DL, TLI); setVal(&*CurInst, InstResult); } // If we just processed an invoke, we finished evaluating the block. if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) { NextBB = II->getNormalDest(); DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); return true; } // Advance program counter. ++CurInst; } }
void GNUstep::IMPCacher::SpeculativelyInline(Instruction *call, Function *function) { BasicBlock *beforeCallBB = call->getParent(); BasicBlock *callBB = SplitBlock(beforeCallBB, call, Owner); BasicBlock *inlineBB = BasicBlock::Create(Context, "inline", callBB->getParent()); BasicBlock::iterator iter = call; iter++; BasicBlock *afterCallBB = SplitBlock(iter->getParent(), iter, Owner); removeTerminator(beforeCallBB); // Put a branch before the call, testing whether the callee really is the // function IRBuilder<> B = IRBuilder<>(beforeCallBB); Value *callee = isa<CallInst>(call) ? cast<CallInst>(call)->getCalledValue() : cast<InvokeInst>(call)->getCalledValue(); const FunctionType *FTy = function->getFunctionType(); const FunctionType *calleeTy = cast<FunctionType>( cast<PointerType>(callee->getType())->getElementType()); if (calleeTy != FTy) { callee = B.CreateBitCast(callee, function->getType()); } Value *isInlineValid = B.CreateICmpEQ(callee, function); B.CreateCondBr(isInlineValid, inlineBB, callBB); // In the inline BB, add a copy of the call, but this time calling the real // version. Instruction *inlineCall = call->clone(); Value *inlineResult= inlineCall; inlineBB->getInstList().push_back(inlineCall); B.SetInsertPoint(inlineBB); if (calleeTy != FTy) { for (unsigned i=0 ; i<FTy->getNumParams() ; i++) { LLVMType *callType = calleeTy->getParamType(i); LLVMType *argType = FTy->getParamType(i); if (callType != argType) { inlineCall->setOperand(i, new BitCastInst(inlineCall->getOperand(i), argType, "", inlineCall)); } } if (FTy->getReturnType() != calleeTy->getReturnType()) { if (FTy->getReturnType() == Type::getVoidTy(Context)) { inlineResult = Constant::getNullValue(calleeTy->getReturnType()); } else { inlineResult = new BitCastInst(inlineCall, calleeTy->getReturnType(), "", inlineBB); } } } B.CreateBr(afterCallBB); // Unify the return values if (call->getType() != Type::getVoidTy(Context)) { PHINode *phi = CreatePHI(call->getType(), 2, "", afterCallBB->begin()); call->replaceAllUsesWith(phi); phi->addIncoming(call, callBB); phi->addIncoming(inlineResult, inlineBB); } // Really do the real inlining InlineFunctionInfo IFI(0, 0); if (CallInst *c = dyn_cast<CallInst>(inlineCall)) { c->setCalledFunction(function); InlineFunction(c, IFI); } else if (InvokeInst *c = dyn_cast<InvokeInst>(inlineCall)) { c->setCalledFunction(function); InlineFunction(c, IFI); } }
/// \brief Check if executing a load of this pointer value cannot trap. /// /// If DT and ScanFrom are specified this method performs context-sensitive /// analysis and returns true if it is safe to load immediately before ScanFrom. /// /// If it is not obviously safe to load from the specified pointer, we do /// a quick local scan of the basic block containing \c ScanFrom, to determine /// if the address is already accessed. /// /// This uses the pointee type to determine how many bytes need to be safe to /// load from the pointer. bool llvm::isSafeToLoadUnconditionally(Value *V, unsigned Align, const DataLayout &DL, Instruction *ScanFrom, const DominatorTree *DT) { // Zero alignment means that the load has the ABI alignment for the target if (Align == 0) Align = DL.getABITypeAlignment(V->getType()->getPointerElementType()); assert(isPowerOf2_32(Align)); // If DT is not specified we can't make context-sensitive query const Instruction* CtxI = DT ? ScanFrom : nullptr; if (isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT)) return true; int64_t ByteOffset = 0; Value *Base = V; Base = GetPointerBaseWithConstantOffset(V, ByteOffset, DL); if (ByteOffset < 0) // out of bounds return false; Type *BaseType = nullptr; unsigned BaseAlign = 0; if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) { // An alloca is safe to load from as load as it is suitably aligned. BaseType = AI->getAllocatedType(); BaseAlign = AI->getAlignment(); } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) { // Global variables are not necessarily safe to load from if they are // interposed arbitrarily. Their size may change or they may be weak and // require a test to determine if they were in fact provided. if (!GV->isInterposable()) { BaseType = GV->getType()->getElementType(); BaseAlign = GV->getAlignment(); } } PointerType *AddrTy = cast<PointerType>(V->getType()); uint64_t LoadSize = DL.getTypeStoreSize(AddrTy->getElementType()); // If we found a base allocated type from either an alloca or global variable, // try to see if we are definitively within the allocated region. We need to // know the size of the base type and the loaded type to do anything in this // case. if (BaseType && BaseType->isSized()) { if (BaseAlign == 0) BaseAlign = DL.getPrefTypeAlignment(BaseType); if (Align <= BaseAlign) { // Check if the load is within the bounds of the underlying object. if (ByteOffset + LoadSize <= DL.getTypeAllocSize(BaseType) && ((ByteOffset % Align) == 0)) return true; } } if (!ScanFrom) return false; // Otherwise, be a little bit aggressive by scanning the local block where we // want to check to see if the pointer is already being loaded or stored // from/to. If so, the previous load or store would have already trapped, // so there is no harm doing an extra load (also, CSE will later eliminate // the load entirely). BasicBlock::iterator BBI = ScanFrom->getIterator(), E = ScanFrom->getParent()->begin(); // We can at least always strip pointer casts even though we can't use the // base here. V = V->stripPointerCasts(); while (BBI != E) { --BBI; // If we see a free or a call which may write to memory (i.e. which might do // a free) the pointer could be marked invalid. if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() && !isa<DbgInfoIntrinsic>(BBI)) return false; Value *AccessedPtr; unsigned AccessedAlign; if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { AccessedPtr = LI->getPointerOperand(); AccessedAlign = LI->getAlignment(); } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { AccessedPtr = SI->getPointerOperand(); AccessedAlign = SI->getAlignment(); } else continue; Type *AccessedTy = AccessedPtr->getType()->getPointerElementType(); if (AccessedAlign == 0) AccessedAlign = DL.getABITypeAlignment(AccessedTy); if (AccessedAlign < Align) continue; // Handle trivial cases. if (AccessedPtr == V) return true; if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) && LoadSize <= DL.getTypeStoreSize(AccessedTy)) return true; } return false; }
void GNUstep::IMPCacher::CacheLookup(Instruction *lookup, Value *slot, Value *version, bool isSuperMessage) { // If this IMP is already cached, don't cache it again. if (lookup->getMetadata(IMPCacheFlagKind)) { return; } lookup->setMetadata(IMPCacheFlagKind, AlreadyCachedFlag); bool isInvoke = false; BasicBlock *beforeLookupBB = lookup->getParent(); BasicBlock *lookupBB = SplitBlock(beforeLookupBB, lookup, Owner); BasicBlock *lookupFinishedBB = lookupBB; BasicBlock *afterLookupBB; if (InvokeInst *inv = dyn_cast<InvokeInst>(lookup)) { afterLookupBB = inv->getNormalDest(); lookupFinishedBB = BasicBlock::Create(Context, "done_lookup", lookupBB->getParent()); CGBuilder B(lookupFinishedBB); B.CreateBr(afterLookupBB); inv->setNormalDest(lookupFinishedBB); isInvoke = true; } else { BasicBlock::iterator iter = lookup; iter++; afterLookupBB = SplitBlock(iter->getParent(), iter, Owner); } removeTerminator(beforeLookupBB); CGBuilder B = CGBuilder(beforeLookupBB); // Load the slot and check that neither it nor the version is 0. Value *versionValue = B.CreateLoad(version); Value *receiverPtr = lookup->getOperand(0); Value *receiver = receiverPtr; if (!isSuperMessage) { receiver = B.CreateLoad(receiverPtr); } // For small objects, we skip the cache entirely. // FIXME: Class messages are never to small objects... bool is64Bit = llvm::Module::Pointer64 == B.GetInsertBlock()->getParent()->getParent()->getPointerSize(); LLVMType *intPtrTy = is64Bit ? Type::getInt64Ty(Context) : Type::getInt32Ty(Context); // Receiver as an integer Value *receiverSmallObject = B.CreatePtrToInt(receiver, intPtrTy); // Receiver is a small object... receiverSmallObject = B.CreateAnd(receiverSmallObject, is64Bit ? 7 : 1); // Receiver is not a small object. receiverSmallObject = B.CreateICmpNE(receiverSmallObject, Constant::getNullValue(intPtrTy)); // Ideally, we'd call objc_msgSend() here, but for now just skip the cache // lookup Value *isCacheEmpty = B.CreateICmpEQ(versionValue, Constant::getNullValue(IntTy)); Value *receiverNil = B.CreateICmpEQ(receiver, Constant::getNullValue(receiver->getType())); isCacheEmpty = B.CreateOr(isCacheEmpty, receiverNil); isCacheEmpty = B.CreateOr(isCacheEmpty, receiverSmallObject); BasicBlock *cacheLookupBB = BasicBlock::Create(Context, "cache_check", lookupBB->getParent()); B.CreateCondBr(isCacheEmpty, lookupBB, cacheLookupBB); // Check the cache node is current B.SetInsertPoint(cacheLookupBB); Value *slotValue = B.CreateLoad(slot, "slot_value"); Value *slotVersion = B.CreateStructGEP(slotValue, 3); // Note: Volatile load because the slot version might have changed in // another thread. slotVersion = B.CreateLoad(slotVersion, true, "slot_version"); Value *slotCachedFor = B.CreateStructGEP(slotValue, 1); slotCachedFor = B.CreateLoad(slotCachedFor, true, "slot_owner"); Value *cls = B.CreateLoad(B.CreateBitCast(receiver, IdTy)); Value *isVersionCorrect = B.CreateICmpEQ(slotVersion, versionValue); Value *isOwnerCorrect = B.CreateICmpEQ(slotCachedFor, cls); Value *isSlotValid = B.CreateAnd(isVersionCorrect, isOwnerCorrect); // If this slot is still valid, skip the lookup. B.CreateCondBr(isSlotValid, afterLookupBB, lookupBB); // Perform the real lookup and cache the result removeTerminator(lookupFinishedBB); // Replace the looked up slot with the loaded one B.SetInsertPoint(afterLookupBB, afterLookupBB->begin()); PHINode *newLookup = IRBuilderCreatePHI(&B, lookup->getType(), 3, "new_lookup"); // Not volatile, so a redundant load elimination pass can do some phi // magic with this later. lookup->replaceAllUsesWith(newLookup); B.SetInsertPoint(lookupFinishedBB); Value * newReceiver = receiver; if (!isSuperMessage) { newReceiver = B.CreateLoad(receiverPtr); } BasicBlock *storeCacheBB = BasicBlock::Create(Context, "cache_store", lookupBB->getParent()); // Don't store the cached lookup if we are doing forwarding tricks. // Also skip caching small object messages for now Value *skipCacheWrite = B.CreateOr(B.CreateICmpNE(receiver, newReceiver), receiverSmallObject); skipCacheWrite = B.CreateOr(skipCacheWrite, receiverNil); B.CreateCondBr(skipCacheWrite, afterLookupBB, storeCacheBB); B.SetInsertPoint(storeCacheBB); // Store it even if the version is 0, because we always check that the // version is not 0 at the start and an occasional redundant store is // probably better than a branch every time. B.CreateStore(lookup, slot); B.CreateStore(B.CreateLoad(B.CreateStructGEP(lookup, 3)), version); cls = B.CreateLoad(B.CreateBitCast(receiver, IdTy)); B.CreateStore(cls, B.CreateStructGEP(lookup, 1)); B.CreateBr(afterLookupBB); newLookup->addIncoming(lookup, lookupFinishedBB); newLookup->addIncoming(slotValue, cacheLookupBB); newLookup->addIncoming(lookup, storeCacheBB); }
bool Substitution::substitute(Function *f) { Function *tmp = f; // Loop for the number of time we run the pass on the function int times = ObfTimes; do { for (Function::iterator bb = tmp->begin(); bb != tmp->end(); ++bb) { for (BasicBlock::iterator inst = bb->begin(); inst != bb->end(); ++inst) { if (inst->isBinaryOp()) { switch (inst->getOpcode()) { case BinaryOperator::Add: // case BinaryOperator::FAdd: // Substitute with random add operation (this->*funcAdd[llvm::cryptoutils->get_range(NUMBER_ADD_SUBST)])( cast<BinaryOperator>(inst)); ++Add; break; case BinaryOperator::Sub: // case BinaryOperator::FSub: // Substitute with random sub operation (this->*funcSub[llvm::cryptoutils->get_range(NUMBER_SUB_SUBST)])( cast<BinaryOperator>(inst)); ++Sub; break; case BinaryOperator::Mul: case BinaryOperator::FMul: //++Mul; break; case BinaryOperator::UDiv: case BinaryOperator::SDiv: case BinaryOperator::FDiv: //++Div; break; case BinaryOperator::URem: case BinaryOperator::SRem: case BinaryOperator::FRem: //++Rem; break; case Instruction::Shl: //++Shi; break; case Instruction::LShr: //++Shi; break; case Instruction::AShr: //++Shi; break; case Instruction::And: (this->* funcAnd[llvm::cryptoutils->get_range(2)])(cast<BinaryOperator>(inst)); ++And; break; case Instruction::Or: (this->* funcOr[llvm::cryptoutils->get_range(2)])(cast<BinaryOperator>(inst)); ++Or; break; case Instruction::Xor: (this->* funcXor[llvm::cryptoutils->get_range(2)])(cast<BinaryOperator>(inst)); ++Xor; break; default: break; } // End switch } // End isBinaryOp } // End for basickblock } // End for Function } while (--times > 0); // for times return false; }
void transform(Module &M,Function &f,vector <BasicBlock*> &BB1,vector <BasicBlock*> &BB2,vector<Instruction*>I1,vector<Instruction*>I2) { LLVMContext &context = M.getContext(); Instruction *br,*I; Type* Int1= Type::getInt8PtrTy(context); Type *typpe=Type::getInt32Ty(context); Type* voidd= Type::getVoidTy(context); vector <BasicBlock*> BB3,BB4,contt,tran; vector<Instruction*>inst1,inst2; BasicBlock *bt; Instruction *in; unsigned i=0; Function *under_over = cast<Function>(M.getOrInsertFunction ("llvm.x86.sse.stmxcsr",voidd,Int1,nullptr));//function how test condition(overflow,underflow...) //*******************************take the basic block and his copy of the highest type******************** for (Function::iterator blocdebase = f.begin(), e = f.end(); blocdebase != e; ++blocdebase) { unsigned n=Search(BB1,blocdebase); if(n<BB1.size()) { BB3.push_back(BB1[n]); BB4.push_back(BB2[n]); } } //********************************take the instruction of each block ******************************* bt=f.begin(); for(unsigned i=0;i<BB3.size();i++) { for(BasicBlock::iterator instruction = BB3[i]->begin(), ee = BB3[i]->end(); instruction != ee; ++instruction) { if (dyn_cast<LoadInst>(instruction)) { //from read instruction unsigned n=Searchinst(I2,dyn_cast<Instruction>(instruction->getOperand(0))); unsigned n1=Searchinst(inst2,dyn_cast<Instruction>(instruction->getOperand(0))); if(n<I2.size()&&n1>inst2.size()) { inst2.push_back(I2[n]);//copy instruction inst1.push_back(I1[n]);//orginal instruction } } if (dyn_cast<StoreInst>(instruction)) { //from write instruction unsigned n=Searchinst(I2,dyn_cast<Instruction>(instruction->getOperand(0))); unsigned n1=Searchinst(inst2,dyn_cast<Instruction>(instruction->getOperand(0))); unsigned n2=Searchinst(I2,dyn_cast<Instruction>(instruction->getOperand(1))); unsigned n3=Searchinst(inst2,dyn_cast<Instruction>(instruction->getOperand(1))); if(n<I2.size()&&n1>inst2.size()){inst2.push_back(I2[n]);inst1.push_back(I1[n]);} else if(n2<I2.size()&&n3>inst2.size()){inst2.push_back(I2[n2]);inst1.push_back(I1[n2]);} } } } while(i<BB3.size()) { BasicBlock *cont = BasicBlock::Create(context, "continue", BB3[i]->getParent());//block that allows to continue in the same way BasicBlock *transforms = BasicBlock::Create(context, "transforms", BB3[i]->getParent());//block that do the transformation I= BB3[i]->getTerminator(); br=I->clone(); I->eraseFromParent(); //*********************************test to choose what to do************************* AllocaInst *test = new AllocaInst(typpe,0, "test",BB3[i]); BitCastInst *newb= new BitCastInst(test,Int1," ",BB3[i]); CallInst::Create(under_over, newb, "",BB3[i]); LoadInst *Cov=new LoadInst(test,"",false,BB3[i]); Value *one = ConstantInt::get(Type::getInt32Ty(context), 63); Value *two = ConstantInt::get(Type::getInt32Ty(context), 16); Value *three = ConstantInt::get(Type::getInt32Ty(context), 8); BinaryOperator *te1 = BinaryOperator::Create(Instruction::And,Cov,one,"",BB3[i]); BinaryOperator *te2 = BinaryOperator::Create(Instruction::And,te1,two,"",BB3[i]); BinaryOperator *te3 = BinaryOperator::Create(Instruction::And,Cov,three,"",BB3[i]); BinaryOperator *te4 = BinaryOperator::Create(Instruction::Or,te3,te2,"",BB3[i]); Value *z = ConstantInt::get(Type::getInt32Ty(context), 0); Value *CondI = new ICmpInst(*BB3[i], ICmpInst::ICMP_NE, te4,z , "cond"); //************************************************************************ BranchInst::Create (transforms,cont ,CondI,BB3[i]);// branch to the block of continue BranchInst::Create(BB4[i],transforms);// branch to the block copy cont->getInstList().push_back(br);// branch to the next block contt.push_back(cont);//take all the continents blocks tran.push_back(transforms);//take all the transformations blocks i++; } BB3.clear(); BB4.clear(); i=0; while(i<contt.size()) { I= contt[i]->getTerminator(); int size= I1.size(); for(unsigned a=0;a<inst1.size();a++) { LoadInst *new_load = new LoadInst(inst2[a],"Nl",false,I);//load the copy value new StoreInst(new_load,inst1[a] , false,I);//store the copy value in original value //********************************************************************** in= tran[i]->getTerminator(); LoadInst *new_load1 = new LoadInst(inst1[a],"e",false,in);//load the original value of original block int id=Searchinst(I1,inst1[a]); AllocaInst *t=dyn_cast<AllocaInst>(I1[(id+(size/3))]); Type *T=t->getAllocatedType (); if(inst1[a]->getType()==I1[(id+(size/3))]->getType())//same type of original value of copy block { new StoreInst(new_load1,I1[(id+(size/3))] , false,4,in); } else//different type { FPExtInst *c= new FPExtInst (new_load1,T , "",in); new StoreInst(c,I1[(id+(size/3))] , false,in); } } i++; } inst1.clear(); contt.clear(); }
void HeterotbbTransform::edit_template_function (Module &M,Function* F,Function* new_join,GlobalVariable *old_gb,Value *gb) { SmallVector<Value*, 16> Args; // Argument lists to the new call vector<Instruction *> toDelete; // old_gb->dump(); // gb->dump(); Constant *Ids[2]; for (Function::iterator BI=F->begin(),BE = F->end(); BI != BE; ++BI) { for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; ++II) { GetElementPtrInst *GEP; GlobalVariable *op; if (isa<CallInst>(II) || isa<InvokeInst>(II)) { CallSite CI(cast<Instruction>(II)); //replace dummy reduce with new reduce if(CI.getCalledFunction()->getName().equals("__join_reduce_hetero")) { Args.clear(); CastInst *newarg1 = CastInst::Create(Instruction::BitCast, CI.getArgument(0), new_join->arg_begin()->getType(), "arg1",CI.getInstruction()); Args.push_back(newarg1); CastInst *newarg2 = CastInst::Create(Instruction::BitCast, CI.getArgument(1), new_join->arg_begin()->getType(), "arg2", CI.getInstruction()); Args.push_back(newarg2); //no need to set attributes Instruction *NewCall = CallInst::Create(new_join, Args, "", CI.getInstruction()); cast<CallInst>(NewCall)->setCallingConv(CI.getCallingConv()); toDelete.push_back(CI.getInstruction()); DEBUG(dbgs()<<"Joins Replaced\n"); } } /* %arrayidx18 = getelementptr inbounds i32 addrspace(3)* getelementptr inbounds ([192 x i32] addrspace(3)* @opencl_kernel_join_name_local_arr, i32 0, i32 0), i64 %idxprom1 */ if((GEP = dyn_cast<GetElementPtrInst>(II)) /*&& (op = dyn_cast<GlobalVariable>(GEP->getOperand(0)))*/ /*&& (op->getName().equals("opencl_kernel_join_name_local_arr"))*/) { //II->dump(); Value *val= II->getOperand(0); if(Constant *op=dyn_cast<ConstantExpr>(val)) { //II->dump(); //II->getOperand(1)->dump(); /*Ids[0]=cast<Constant>(op->getOperand(1)); Ids[1]=cast<Constant>(op->getOperand(1)); Constant *new_op = ConstantExpr::getInBoundsGetElementPtr(cast<Constant>(gb),Ids,2); new_op->dump(); Instruction *inst = GetElementPtrInst::CreateInBounds(new_op, II->getOperand(1), II->getName()+"_temp",II); Value *Elts[] = {MDString::get(M.getContext(), "local_access")}; MDNode *Node = MDNode::get(M.getContext(), Elts); inst->setMetadata("local_access",Node); inst->dump(); II->replaceAllUsesWith(inst); toDelete.push_back(II); */ Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(M.getContext()), 0), ConstantInt::get(Type::getInt32Ty(M.getContext()), 0) }; //gb->getType()->dump(); //gb->dump(); Instruction *inst_= GetElementPtrInst::CreateInBounds(gb, Idxs, /*Idxs+2,*/ II->getName()+"_temp_",II); //inst_->dump(); Instruction *inst= GetElementPtrInst::CreateInBounds(inst_, II->getOperand(1), II->getName()+"_temp",II); Value *Elts[] = {MDString::get(M.getContext(), inst->getName())}; MDNode *Node = MDNode::get(M.getContext(), Elts); inst->setMetadata("local_access",Node); //inst->dump(); II->replaceAllUsesWith(inst); toDelete.push_back(II); } } } } while(!toDelete.empty()) { Instruction *g = toDelete.back(); toDelete.pop_back(); g->eraseFromParent(); } }
// Reroll the provided loop with respect to the provided induction variable. // Generally, we're looking for a loop like this: // // %iv = phi [ (preheader, ...), (body, %iv.next) ] // f(%iv) // %iv.1 = add %iv, 1 <-- a root increment // f(%iv.1) // %iv.2 = add %iv, 2 <-- a root increment // f(%iv.2) // %iv.scale_m_1 = add %iv, scale-1 <-- a root increment // f(%iv.scale_m_1) // ... // %iv.next = add %iv, scale // %cmp = icmp(%iv, ...) // br %cmp, header, exit // // Notably, we do not require that f(%iv), f(%iv.1), etc. be isolated groups of // instructions. In other words, the instructions in f(%iv), f(%iv.1), etc. can // be intermixed with eachother. The restriction imposed by this algorithm is // that the relative order of the isomorphic instructions in f(%iv), f(%iv.1), // etc. be the same. // // First, we collect the use set of %iv, excluding the other increment roots. // This gives us f(%iv). Then we iterate over the loop instructions (scale-1) // times, having collected the use set of f(%iv.(i+1)), during which we: // - Ensure that the next unmatched instruction in f(%iv) is isomorphic to // the next unmatched instruction in f(%iv.(i+1)). // - Ensure that both matched instructions don't have any external users // (with the exception of last-in-chain reduction instructions). // - Track the (aliasing) write set, and other side effects, of all // instructions that belong to future iterations that come before the matched // instructions. If the matched instructions read from that write set, then // f(%iv) or f(%iv.(i+1)) has some dependency on instructions in // f(%iv.(j+1)) for some j > i, and we cannot reroll the loop. Similarly, // if any of these future instructions had side effects (could not be // speculatively executed), and so do the matched instructions, when we // cannot reorder those side-effect-producing instructions, and rerolling // fails. // // Finally, we make sure that all loop instructions are either loop increment // roots, belong to simple latch code, parts of validated reductions, part of // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions // have been validated), then we reroll the loop. bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, ReductionTracker &Reductions) { const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV)); uint64_t Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))-> getValue()->getZExtValue(); // The collection of loop increment instructions. SmallInstructionVector LoopIncs; uint64_t Scale = Inc; // The effective induction variable, IV, is normally also the real induction // variable. When we're dealing with a loop like: // for (int i = 0; i < 500; ++i) // x[3*i] = ...; // x[3*i+1] = ...; // x[3*i+2] = ...; // then the real IV is still i, but the effective IV is (3*i). Instruction *RealIV = IV; if (Inc == 1 && !findScaleFromMul(RealIV, Scale, IV, LoopIncs)) return false; assert(Scale <= MaxInc && "Scale is too large"); assert(Scale > 1 && "Scale must be at least 2"); // The set of increment instructions for each increment value. SmallVector<SmallInstructionVector, 32> Roots(Scale-1); SmallInstructionSet AllRoots; if (!collectAllRoots(L, Inc, Scale, IV, Roots, AllRoots, LoopIncs)) return false; DEBUG(dbgs() << "LRR: Found all root induction increments for: " << *RealIV << "\n"); // An array of just the possible reductions for this scale factor. When we // collect the set of all users of some root instructions, these reduction // instructions are treated as 'final' (their uses are not considered). // This is important because we don't want the root use set to search down // the reduction chain. SmallInstructionSet PossibleRedSet; SmallInstructionSet PossibleRedLastSet, PossibleRedPHISet; Reductions.restrictToScale(Scale, PossibleRedSet, PossibleRedPHISet, PossibleRedLastSet); // We now need to check for equivalence of the use graph of each root with // that of the primary induction variable (excluding the roots). Our goal // here is not to solve the full graph isomorphism problem, but rather to // catch common cases without a lot of work. As a result, we will assume // that the relative order of the instructions in each unrolled iteration // is the same (although we will not make an assumption about how the // different iterations are intermixed). Note that while the order must be // the same, the instructions may not be in the same basic block. SmallInstructionSet Exclude(AllRoots); Exclude.insert(LoopIncs.begin(), LoopIncs.end()); DenseSet<Instruction *> BaseUseSet; collectInLoopUserSet(L, IV, Exclude, PossibleRedSet, BaseUseSet); DenseSet<Instruction *> AllRootUses; std::vector<DenseSet<Instruction *> > RootUseSets(Scale-1); bool MatchFailed = false; for (unsigned i = 0; i < Scale-1 && !MatchFailed; ++i) { DenseSet<Instruction *> &RootUseSet = RootUseSets[i]; collectInLoopUserSet(L, Roots[i], SmallInstructionSet(), PossibleRedSet, RootUseSet); DEBUG(dbgs() << "LRR: base use set size: " << BaseUseSet.size() << " vs. iteration increment " << (i+1) << " use set size: " << RootUseSet.size() << "\n"); if (BaseUseSet.size() != RootUseSet.size()) { MatchFailed = true; break; } // In addition to regular aliasing information, we need to look for // instructions from later (future) iterations that have side effects // preventing us from reordering them past other instructions with side // effects. bool FutureSideEffects = false; AliasSetTracker AST(*AA); // The map between instructions in f(%iv.(i+1)) and f(%iv). DenseMap<Value *, Value *> BaseMap; assert(L->getNumBlocks() == 1 && "Cannot handle multi-block loops"); for (BasicBlock::iterator J1 = Header->begin(), J2 = Header->begin(), JE = Header->end(); J1 != JE && !MatchFailed; ++J1) { if (cast<Instruction>(J1) == RealIV) continue; if (cast<Instruction>(J1) == IV) continue; if (!BaseUseSet.count(J1)) continue; if (PossibleRedPHISet.count(J1)) // Skip reduction PHIs. continue; while (J2 != JE && (!RootUseSet.count(J2) || std::find(Roots[i].begin(), Roots[i].end(), J2) != Roots[i].end())) { // As we iterate through the instructions, instructions that don't // belong to previous iterations (or the base case), must belong to // future iterations. We want to track the alias set of writes from // previous iterations. if (!isa<PHINode>(J2) && !BaseUseSet.count(J2) && !AllRootUses.count(J2)) { if (J2->mayWriteToMemory()) AST.add(J2); // Note: This is specifically guarded by a check on isa<PHINode>, // which while a valid (somewhat arbitrary) micro-optimization, is // needed because otherwise isSafeToSpeculativelyExecute returns // false on PHI nodes. if (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2, DL)) FutureSideEffects = true; } ++J2; } if (!J1->isSameOperationAs(J2)) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << "\n"); MatchFailed = true; break; } // Make sure that this instruction, which is in the use set of this // root instruction, does not also belong to the base set or the set of // some previous root instruction. if (BaseUseSet.count(J2) || AllRootUses.count(J2)) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (prev. case overlap)\n"); MatchFailed = true; break; } // Make sure that we don't alias with any instruction in the alias set // tracker. If we do, then we depend on a future iteration, and we // can't reroll. if (J2->mayReadFromMemory()) { for (AliasSetTracker::iterator K = AST.begin(), KE = AST.end(); K != KE && !MatchFailed; ++K) { if (K->aliasesUnknownInst(J2, *AA)) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (depends on future store)\n"); MatchFailed = true; break; } } } // If we've past an instruction from a future iteration that may have // side effects, and this instruction might also, then we can't reorder // them, and this matching fails. As an exception, we allow the alias // set tracker to handle regular (simple) load/store dependencies. if (FutureSideEffects && ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) || (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (side effects prevent reordering)\n"); MatchFailed = true; break; } // For instructions that are part of a reduction, if the operation is // associative, then don't bother matching the operands (because we // already know that the instructions are isomorphic, and the order // within the iteration does not matter). For non-associative reductions, // we do need to match the operands, because we need to reject // out-of-order instructions within an iteration! // For example (assume floating-point addition), we need to reject this: // x += a[i]; x += b[i]; // x += a[i+1]; x += b[i+1]; // x += b[i+2]; x += a[i+2]; bool InReduction = Reductions.isPairInSame(J1, J2); if (!(InReduction && J1->isAssociative())) { bool Swapped = false, SomeOpMatched = false;; for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) { Value *Op2 = J2->getOperand(j); // If this is part of a reduction (and the operation is not // associatve), then we match all operands, but not those that are // part of the reduction. if (InReduction) if (Instruction *Op2I = dyn_cast<Instruction>(Op2)) if (Reductions.isPairInSame(J2, Op2I)) continue; DenseMap<Value *, Value *>::iterator BMI = BaseMap.find(Op2); if (BMI != BaseMap.end()) Op2 = BMI->second; else if (std::find(Roots[i].begin(), Roots[i].end(), (Instruction*) Op2) != Roots[i].end()) Op2 = IV; if (J1->getOperand(Swapped ? unsigned(!j) : j) != Op2) { // If we've not already decided to swap the matched operands, and // we've not already matched our first operand (note that we could // have skipped matching the first operand because it is part of a // reduction above), and the instruction is commutative, then try // the swapped match. if (!Swapped && J1->isCommutative() && !SomeOpMatched && J1->getOperand(!j) == Op2) { Swapped = true; } else { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (operand " << j << ")\n"); MatchFailed = true; break; } } SomeOpMatched = true; } } if ((!PossibleRedLastSet.count(J1) && hasUsesOutsideLoop(J1, L)) || (!PossibleRedLastSet.count(J2) && hasUsesOutsideLoop(J2, L))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (uses outside loop)\n"); MatchFailed = true; break; } if (!MatchFailed) BaseMap.insert(std::pair<Value *, Value *>(J2, J1)); AllRootUses.insert(J2); Reductions.recordPair(J1, J2, i+1); ++J2; } } if (MatchFailed) return false; DEBUG(dbgs() << "LRR: Matched all iteration increments for " << *RealIV << "\n"); DenseSet<Instruction *> LoopIncUseSet; collectInLoopUserSet(L, LoopIncs, SmallInstructionSet(), SmallInstructionSet(), LoopIncUseSet); DEBUG(dbgs() << "LRR: Loop increment set size: " << LoopIncUseSet.size() << "\n"); // Make sure that all instructions in the loop have been included in some // use set. for (BasicBlock::iterator J = Header->begin(), JE = Header->end(); J != JE; ++J) { if (isa<DbgInfoIntrinsic>(J)) continue; if (cast<Instruction>(J) == RealIV) continue; if (cast<Instruction>(J) == IV) continue; if (BaseUseSet.count(J) || AllRootUses.count(J) || (LoopIncUseSet.count(J) && (J->isTerminator() || isSafeToSpeculativelyExecute(J, DL)))) continue; if (AllRoots.count(J)) continue; if (Reductions.isSelectedPHI(J)) continue; DEBUG(dbgs() << "LRR: aborting reroll based on " << *RealIV << " unprocessed instruction found: " << *J << "\n"); MatchFailed = true; break; } if (MatchFailed) return false; DEBUG(dbgs() << "LRR: all instructions processed from " << *RealIV << "\n"); if (!Reductions.validateSelected()) return false; // At this point, we've validated the rerolling, and we're committed to // making changes! Reductions.replaceSelected(); // Remove instructions associated with non-base iterations. for (BasicBlock::reverse_iterator J = Header->rbegin(); J != Header->rend();) { if (AllRootUses.count(&*J)) { Instruction *D = &*J; DEBUG(dbgs() << "LRR: removing: " << *D << "\n"); D->eraseFromParent(); continue; } ++J; } // Insert the new induction variable. const SCEV *Start = RealIVSCEV->getStart(); if (Inc == 1) Start = SE->getMulExpr(Start, SE->getConstant(Start->getType(), Scale)); const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>(SE->getAddRecExpr(Start, SE->getConstant(RealIVSCEV->getType(), 1), L, SCEV::FlagAnyWrap)); { // Limit the lifetime of SCEVExpander. SCEVExpander Expander(*SE, "reroll"); Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin()); for (DenseSet<Instruction *>::iterator J = BaseUseSet.begin(), JE = BaseUseSet.end(); J != JE; ++J) (*J)->replaceUsesOfWith(IV, NewIV); if (BranchInst *BI = dyn_cast<BranchInst>(Header->getTerminator())) { if (LoopIncUseSet.count(BI)) { const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); if (Inc == 1) ICSCEV = SE->getMulExpr(ICSCEV, SE->getConstant(ICSCEV->getType(), Scale)); // Iteration count SCEV minus 1 const SCEV *ICMinus1SCEV = SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1)); Value *ICMinus1; // Iteration count minus 1 if (isa<SCEVConstant>(ICMinus1SCEV)) { ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), BI); } else { BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) Preheader = InsertPreheaderForLoop(L, this); ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(), Preheader->getTerminator()); } Value *Cond = new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinus1, "exitcond"); BI->setCondition(Cond); if (BI->getSuccessor(1) != Header) BI->swapSuccessors(); } } } SimplifyInstructionsInBlock(Header, DL, TLI); DeleteDeadPHIs(Header, TLI); ++NumRerolledLoops; return true; }