C++ (Cpp) LoadInst::replaceAllUsesWith Examples

Programming Language: C++ (Cpp)

Class/Type: LoadInst

Method/Function: replaceAllUsesWith

Examples at hotexamples.com: 14

C++ (Cpp) LoadInst::replaceAllUsesWith - 14 examples found. These are the top rated real world C++ (Cpp) examples of LoadInst::replaceAllUsesWith extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getPointerOperand(30)

getType(30)

setAlignment(22)

eraseFromParent(14)

replaceAllUsesWith(14)

getOperand(13)

getAlignment(12)

getParent(12)

isSimple(10)

setMetadata(9)

getPointerAddressSpace(8)

setAtomic(8)

isVolatile(5)

getMetadata(4)

use_empty(4)

setName(4)

use_begin(3)

getModule(3)

hasOneUse(3)

print(3)

setDebugLoc(2)

getName(2)

setOrdering(2)

dump(2)

getContext(2)

setAAMetadata(2)

getAAMetadata(2)

copyMetadata(1)

takeName(1)

user_begin(1)

setVolatile(1)

isAtomic(1)

setSynchScope(1)

setOperand(1)

getOrdering(1)

getNumUses(1)

getNextNode(1)

getDebugLoc(1)

users(1)

Example #1

Show file

File: PromoteMemoryToRegister.cpp Project: AHelper/llvm-z80-target

/// RewriteSingleStoreAlloca - If there is only a single store to this value,
/// replace any loads of it that are directly dominated by the definition with
/// the value stored.
void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
                                              AllocaInfo &Info,
                                              LargeBlockInfo &LBI) {
  StoreInst *OnlyStore = Info.OnlyStore;
  bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
  BasicBlock *StoreBB = OnlyStore->getParent();
  int StoreIndex = -1;

  // Clear out UsingBlocks.  We will reconstruct it here if needed.
  Info.UsingBlocks.clear();
  
  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
    Instruction *UserInst = cast<Instruction>(*UI++);
    if (!isa<LoadInst>(UserInst)) {
      assert(UserInst == OnlyStore && "Should only have load/stores");
      continue;
    }
    LoadInst *LI = cast<LoadInst>(UserInst);
    
    // Okay, if we have a load from the alloca, we want to replace it with the
    // only value stored to the alloca.  We can do this if the value is
    // dominated by the store.  If not, we use the rest of the mem2reg machinery
    // to insert the phi nodes as needed.
    if (!StoringGlobalVal) {  // Non-instructions are always dominated.
      if (LI->getParent() == StoreBB) {
        // If we have a use that is in the same block as the store, compare the
        // indices of the two instructions to see which one came first.  If the
        // load came before the store, we can't handle it.
        if (StoreIndex == -1)
          StoreIndex = LBI.getInstructionIndex(OnlyStore);

        if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
          // Can't handle this load, bail out.
          Info.UsingBlocks.push_back(StoreBB);
          continue;
        }
        
      } else if (LI->getParent() != StoreBB &&
                 !dominates(StoreBB, LI->getParent())) {
        // If the load and store are in different blocks, use BB dominance to
        // check their relationships.  If the store doesn't dom the use, bail
        // out.
        Info.UsingBlocks.push_back(LI->getParent());
        continue;
      }
    }
    
    // Otherwise, we *can* safely rewrite this load.
    Value *ReplVal = OnlyStore->getOperand(0);
    // If the replacement value is the load, this must occur in unreachable
    // code.
    if (ReplVal == LI)
      ReplVal = UndefValue::get(LI->getType());
    LI->replaceAllUsesWith(ReplVal);
    if (AST && LI->getType()->isPointerTy())
      AST->deleteValue(LI);
    LI->eraseFromParent();
    LBI.deleteValue(LI);
  }
}

Example #2

Show file

File: AMDGPUCodeGenPrepare.cpp Project: bkaradzic/SwiftShader

bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
  if (!WidenLoads)
    return false;

  if ((I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
       I.getPointerAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT) &&
      canWidenScalarExtLoad(I)) {
    IRBuilder<> Builder(&I);
    Builder.SetCurrentDebugLocation(I.getDebugLoc());

    Type *I32Ty = Builder.getInt32Ty();
    Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
    Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
    LoadInst *WidenLoad = Builder.CreateLoad(BitCast);
    WidenLoad->copyMetadata(I);

    // If we have range metadata, we need to convert the type, and not make
    // assumptions about the high bits.
    if (auto *Range = WidenLoad->getMetadata(LLVMContext::MD_range)) {
      ConstantInt *Lower =
        mdconst::extract<ConstantInt>(Range->getOperand(0));

      if (Lower->getValue().isNullValue()) {
        WidenLoad->setMetadata(LLVMContext::MD_range, nullptr);
      } else {
        Metadata *LowAndHigh[] = {
          ConstantAsMetadata::get(ConstantInt::get(I32Ty, Lower->getValue().zext(32))),
          // Don't make assumptions about the high bits.
          ConstantAsMetadata::get(ConstantInt::get(I32Ty, 0))
        };

        WidenLoad->setMetadata(LLVMContext::MD_range,
                               MDNode::get(Mod->getContext(), LowAndHigh));
      }
    }

    int TySize = Mod->getDataLayout().getTypeSizeInBits(I.getType());
    Type *IntNTy = Builder.getIntNTy(TySize);
    Value *ValTrunc = Builder.CreateTrunc(WidenLoad, IntNTy);
    Value *ValOrig = Builder.CreateBitCast(ValTrunc, I.getType());
    I.replaceAllUsesWith(ValOrig);
    I.eraseFromParent();
    return true;
  }

  return false;
}

Example #3

Show file

File: PromoteMemoryToRegister.cpp Project: AHelper/llvm-z80-target

/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
/// block.  If this is the case, avoid traversing the CFG and inserting a lot of
/// potentially useless PHI nodes by just performing a single linear pass over
/// the basic block using the Alloca.
///
/// If we cannot promote this alloca (because it is read before it is written),
/// return true.  This is necessary in cases where, due to control flow, the
/// alloca is potentially undefined on some control flow paths.  e.g. code like
/// this is potentially correct:
///
///   for (...) { if (c) { A = undef; undef = B; } }
///
/// ... so long as A is not used before undef is set.
///
void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
                                              LargeBlockInfo &LBI) {
  // The trickiest case to handle is when we have large blocks. Because of this,
  // this code is optimized assuming that large blocks happen.  This does not
  // significantly pessimize the small block case.  This uses LargeBlockInfo to
  // make it efficient to get the index of various operations in the block.
  
  // Clear out UsingBlocks.  We will reconstruct it here if needed.
  Info.UsingBlocks.clear();
  
  // Walk the use-def list of the alloca, getting the locations of all stores.
  typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
  StoresByIndexTy StoresByIndex;
  
  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
       UI != E; ++UI) 
    if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
      StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));

  // If there are no stores to the alloca, just replace any loads with undef.
  if (StoresByIndex.empty()) {
    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 
      if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
        LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
        if (AST && LI->getType()->isPointerTy())
          AST->deleteValue(LI);
        LBI.deleteValue(LI);
        LI->eraseFromParent();
      }
    return;
  }
  
  // Sort the stores by their index, making it efficient to do a lookup with a
  // binary search.
  std::sort(StoresByIndex.begin(), StoresByIndex.end());
  
  // Walk all of the loads from this alloca, replacing them with the nearest
  // store above them, if any.
  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
    LoadInst *LI = dyn_cast<LoadInst>(*UI++);
    if (!LI) continue;
    
    unsigned LoadIdx = LBI.getInstructionIndex(LI);
    
    // Find the nearest store that has a lower than this load. 
    StoresByIndexTy::iterator I = 
      std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
                       std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
                       StoreIndexSearchPredicate());
    
    // If there is no store before this load, then we can't promote this load.
    if (I == StoresByIndex.begin()) {
      // Can't handle this load, bail out.
      Info.UsingBlocks.push_back(LI->getParent());
      continue;
    }
      
    // Otherwise, there was a store before this load, the load takes its value.
    --I;
    LI->replaceAllUsesWith(I->second->getOperand(0));
    if (AST && LI->getType()->isPointerTy())
      AST->deleteValue(LI);
    LI->eraseFromParent();
    LBI.deleteValue(LI);
  }
}

Example #4

Show file

File: llvmAggregateGlobalOps.cpp Project: AbheekG/chapel

/// tryAggregating - When scanning forward over instructions, we look for
/// other loads or stores that could be aggregated with this one.
/// Returns the last instruction added (if one was added) since we might have
/// removed some loads or stores and that might invalidate an iterator.
Instruction *AggregateGlobalOpsOpt::tryAggregating(Instruction *StartInst, Value *StartPtr,
    bool DebugThis) {
  if (TD == 0) return 0;

  Module* M = StartInst->getParent()->getParent()->getParent();
  LLVMContext& Context = StartInst->getContext();

  Type* int8Ty = Type::getInt8Ty(Context);
  Type* sizeTy = Type::getInt64Ty(Context);
  Type* globalInt8PtrTy = int8Ty->getPointerTo(globalSpace);
  bool isLoad = isa<LoadInst>(StartInst);
  bool isStore = isa<StoreInst>(StartInst);
  Instruction *lastAddedInsn = NULL;
  Instruction *LastLoadOrStore = NULL;
 
  SmallVector<Instruction*, 8> toRemove;

  // Okay, so we now have a single global load/store. Scan to find
  // all subsequent stores of the same value to offset from the same pointer.
  // Join these together into ranges, so we can decide whether contiguous blocks
  // are stored.
  MemOpRanges Ranges(*TD);
 
  // Put the first store in since we want to preserve the order.
  Ranges.addInst(0, StartInst);

  BasicBlock::iterator BI = StartInst;
  for (++BI; !isa<TerminatorInst>(BI); ++BI) {

    if( isGlobalLoadOrStore(BI, globalSpace, isLoad, isStore) ) {
      // OK!
    } else {
      // If the instruction is readnone, ignore it, otherwise bail out.  We
      // don't even allow readonly here because we don't want something like:
      // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
      if (BI->mayWriteToMemory())
        break;
      if (isStore && BI->mayReadFromMemory())
        break;
      continue;
    }

    if ( isStore && isa<StoreInst>(BI) ) {
      StoreInst *NextStore = cast<StoreInst>(BI);
      // If this is a store, see if we can merge it in.
      if (!NextStore->isSimple()) break;

      // Check to see if this store is to a constant offset from the start ptr.
      int64_t Offset;
      if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
        break;

      Ranges.addStore(Offset, NextStore);
      LastLoadOrStore = NextStore;
    } else {
      LoadInst *NextLoad = cast<LoadInst>(BI);
      if (!NextLoad->isSimple()) break;

      // Check to see if this load is to a constant offset from the start ptr.
      int64_t Offset;
      if (!IsPointerOffset(StartPtr, NextLoad->getPointerOperand(), Offset, *TD))
        break;

      Ranges.addLoad(Offset, NextLoad);
      LastLoadOrStore = NextLoad;
    }
  }

  // If we have no ranges, then we just had a single store with nothing that
  // could be merged in.  This is a very common case of course.
  if (!Ranges.moreThanOneOp())
    return 0;

  // Divide the instructions between StartInst and LastLoadOrStore into
  // addressing, memops, and uses of memops (uses of loads)
  reorderAddressingMemopsUses(StartInst, LastLoadOrStore, DebugThis);

  Instruction* insertBefore = StartInst;
  IRBuilder<> builder(insertBefore);

  // Now that we have full information about ranges, loop over the ranges and
  // emit memcpy's for anything big enough to be worthwhile.
  for (MemOpRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
       I != E; ++I) {
    const MemOpRange &Range = *I;
    Value* oldBaseI = NULL;
    Value* newBaseI = NULL;

    if (Range.TheStores.size() == 1) continue; // Don't bother if there's only one thing...

    builder.SetInsertPoint(insertBefore);

    // Otherwise, we do want to transform this!  Create a new memcpy.
    // Get the starting pointer of the block.
    StartPtr = Range.StartPtr;

    if( DebugThis ) {
      errs() << "base is:";
      StartPtr->dump();
    }

    // Determine alignment
    unsigned Alignment = Range.Alignment;
    if (Alignment == 0) {
      Type *EltType =
        cast<PointerType>(StartPtr->getType())->getElementType();
      Alignment = TD->getABITypeAlignment(EltType);
    }

    Instruction *alloc = NULL;
    Value *globalPtr = NULL;

    // create temporary alloca space to communicate to/from.
    alloc = makeAlloca(int8Ty, "agg.tmp", insertBefore,
                       Range.End-Range.Start, Alignment);

    // Generate the old and new base pointers before we output
    // anything else.
    {
      Type* iPtrTy = TD->getIntPtrType(alloc->getType());
      Type* iNewBaseTy = TD->getIntPtrType(alloc->getType());
      oldBaseI = builder.CreatePtrToInt(StartPtr, iPtrTy, "agg.tmp.oldb.i");
      newBaseI = builder.CreatePtrToInt(alloc, iNewBaseTy, "agg.tmp.newb.i");
    }

    // If storing, do the stores we had into our alloca'd region.
    if( isStore ) {
      for (SmallVector<Instruction*, 16>::const_iterator
           SI = Range.TheStores.begin(),
           SE = Range.TheStores.end(); SI != SE; ++SI) {
        StoreInst* oldStore = cast<StoreInst>(*SI);

        if( DebugThis ) {
          errs() << "have store in range:";
          oldStore->dump();
        }

        Value* ptrToAlloc = rebasePointer(oldStore->getPointerOperand(),
                                          StartPtr, alloc, "agg.tmp",
                                          &builder, *TD, oldBaseI, newBaseI);
        // Old load must not be volatile or atomic... or we shouldn't have put
        // it in ranges
        assert(!(oldStore->isVolatile() || oldStore->isAtomic()));
        StoreInst* newStore =
          builder.CreateStore(oldStore->getValueOperand(), ptrToAlloc);
        newStore->setAlignment(oldStore->getAlignment());
        newStore->takeName(oldStore);
      }
    }

    // cast the pointer that was load/stored to i8 if necessary.
    if( StartPtr->getType()->getPointerElementType() == int8Ty ) {
      globalPtr = StartPtr;
    } else {
      globalPtr = builder.CreatePointerCast(StartPtr, globalInt8PtrTy, "agg.cast");
    }

    // Get a Constant* for the length.
    Constant* len = ConstantInt::get(sizeTy, Range.End-Range.Start, false);

    // Now add the memcpy instruction
    unsigned addrSpaceDst,addrSpaceSrc;
    addrSpaceDst = addrSpaceSrc = 0;
    if( isStore ) addrSpaceDst = globalSpace;
    if( isLoad ) addrSpaceSrc = globalSpace;

    Type *types[3];
    types[0] = PointerType::get(int8Ty, addrSpaceDst);
    types[1] = PointerType::get(int8Ty, addrSpaceSrc);
    types[2] = sizeTy;

    Function *func = Intrinsic::getDeclaration(M, Intrinsic::memcpy, types);

    Value* args[5]; // dst src len alignment isvolatile
    if( isStore ) {
      // it's a store (ie put)
      args[0] = globalPtr;
      args[1] = alloc;
    } else {
      // it's a load (ie get)
      args[0] = alloc;
      args[1] = globalPtr;
    }
    args[2] = len;
    // alignment
    args[3] = ConstantInt::get(Type::getInt32Ty(Context), 0, false);
    // isvolatile
    args[4] = ConstantInt::get(Type::getInt1Ty(Context), 0, false);

    Instruction* aMemCpy = builder.CreateCall(func, args);

    /*
    DEBUG(dbgs() << "Replace ops:\n";
      for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
        dbgs() << *Range.TheStores[i] << '\n';
      dbgs() << "With: " << *AMemSet << '\n');
      */

    if (!Range.TheStores.empty())
      aMemCpy->setDebugLoc(Range.TheStores[0]->getDebugLoc());

    lastAddedInsn = aMemCpy;

    // If loading, load from the memcpy'd region
    if( isLoad ) {
      for (SmallVector<Instruction*, 16>::const_iterator
           SI = Range.TheStores.begin(),
           SE = Range.TheStores.end(); SI != SE; ++SI) {
        LoadInst* oldLoad = cast<LoadInst>(*SI);
        if( DebugThis ) {
          errs() << "have load in range:";
          oldLoad->dump();
        }

        Value* ptrToAlloc = rebasePointer(oldLoad->getPointerOperand(),
                                          StartPtr, alloc, "agg.tmp",
                                          &builder, *TD, oldBaseI, newBaseI);
        // Old load must not be volatile or atomic... or we shouldn't have put
        // it in ranges
        assert(!(oldLoad->isVolatile() || oldLoad->isAtomic()));
        LoadInst* newLoad = builder.CreateLoad(ptrToAlloc);
        newLoad->setAlignment(oldLoad->getAlignment());
        oldLoad->replaceAllUsesWith(newLoad);
        newLoad->takeName(oldLoad);
        lastAddedInsn = newLoad;
      }
    }

    // Save old loads/stores for removal
    for (SmallVector<Instruction*, 16>::const_iterator
         SI = Range.TheStores.begin(),
         SE = Range.TheStores.end(); SI != SE; ++SI) {
      Instruction* insn = *SI;
      toRemove.push_back(insn);
    }
  }

  // Zap all the old loads/stores
  for (SmallVector<Instruction*, 16>::const_iterator
       SI = toRemove.begin(),
       SE = toRemove.end(); SI != SE; ++SI) {
    (*SI)->eraseFromParent();
  }

  return lastAddedInsn;
}

Example #5

Show file

/// DoPromotion - This method actually performs the promotion of the specified
/// arguments, and returns the new function.  At this point, we know that it's
/// safe to do so.
CallGraphNode *ArgPromotion::DoPromotion(Function *F,
                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
                              SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {

  // Start by computing a new prototype for the function, which is the same as
  // the old function, but has modified arguments.
  const FunctionType *FTy = F->getFunctionType();
  std::vector<const Type*> Params;

  typedef std::set<IndicesVector> ScalarizeTable;

  // ScalarizedElements - If we are promoting a pointer that has elements
  // accessed out of it, keep track of which elements are accessed so that we
  // can add one argument for each.
  //
  // Arguments that are directly loaded will have a zero element value here, to
  // handle cases where there are both a direct load and GEP accesses.
  //
  std::map<Argument*, ScalarizeTable> ScalarizedElements;

  // OriginalLoads - Keep track of a representative load instruction from the
  // original function so that we can tell the alias analysis implementation
  // what the new GEP/Load instructions we are inserting look like.
  std::map<IndicesVector, LoadInst*> OriginalLoads;

  // Attributes - Keep track of the parameter attributes for the arguments
  // that we are *not* promoting. For the ones that we do promote, the parameter
  // attributes are lost
  SmallVector<AttributeWithIndex, 8> AttributesVec;
  const AttrListPtr &PAL = F->getAttributes();

  // Add any return attributes.
  if (Attributes attrs = PAL.getRetAttributes())
    AttributesVec.push_back(AttributeWithIndex::get(0, attrs));

  // First, determine the new argument list
  unsigned ArgIndex = 1;
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
       ++I, ++ArgIndex) {
    if (ByValArgsToTransform.count(I)) {
      // Simple byval argument? Just add all the struct element types.
      const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      const StructType *STy = cast<StructType>(AgTy);
      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
        Params.push_back(STy->getElementType(i));
      ++NumByValArgsPromoted;
    } else if (!ArgsToPromote.count(I)) {
      // Unchanged argument
      Params.push_back(I->getType());
      if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
        AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
    } else if (I->use_empty()) {
      // Dead argument (which are always marked as promotable)
      ++NumArgumentsDead;
    } else {
      // Okay, this is being promoted. This means that the only uses are loads
      // or GEPs which are only used by loads

      // In this table, we will track which indices are loaded from the argument
      // (where direct loads are tracked as no indices).
      ScalarizeTable &ArgIndices = ScalarizedElements[I];
      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
           ++UI) {
        Instruction *User = cast<Instruction>(*UI);
        assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User));
        IndicesVector Indices;
        Indices.reserve(User->getNumOperands() - 1);
        // Since loads will only have a single operand, and GEPs only a single
        // non-index operand, this will record direct loads without any indices,
        // and gep+loads with the GEP indices.
        for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end();
             II != IE; ++II)
          Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
        // GEPs with a single 0 index can be merged with direct loads
        if (Indices.size() == 1 && Indices.front() == 0)
          Indices.clear();
        ArgIndices.insert(Indices);
        LoadInst *OrigLoad;
        if (LoadInst *L = dyn_cast<LoadInst>(User))
          OrigLoad = L;
        else
          // Take any load, we will use it only to update Alias Analysis
          OrigLoad = cast<LoadInst>(User->use_back());
        OriginalLoads[Indices] = OrigLoad;
      }

      // Add a parameter to the function for each element passed in.
      for (ScalarizeTable::iterator SI = ArgIndices.begin(),
             E = ArgIndices.end(); SI != E; ++SI) {
        // not allowed to dereference ->begin() if size() is 0
        Params.push_back(GetElementPtrInst::getIndexedType(I->getType(),
                                                           SI->begin(),
                                                           SI->end()));
        assert(Params.back());
      }

      if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
        ++NumArgumentsPromoted;
      else
        ++NumAggregatesPromoted;
    }
  }

  // Add any function attributes.
  if (Attributes attrs = PAL.getFnAttributes())
    AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));

  const Type *RetTy = FTy->getReturnType();

  // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
  // have zero fixed arguments.
  bool ExtraArgHack = false;
  if (Params.empty() && FTy->isVarArg()) {
    ExtraArgHack = true;
    Params.push_back(Type::getInt32Ty(F->getContext()));
  }

  // Construct the new function type using the new arguments.
  FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());

  // Create the new function body and insert it into the module.
  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
  NF->copyAttributesFrom(F);

  
  DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
        << "From: " << *F);
  
  // Recompute the parameter attributes list based on the new arguments for
  // the function.
  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                     AttributesVec.end()));
  AttributesVec.clear();

  F->getParent()->getFunctionList().insert(F, NF);
  NF->takeName(F);

  // Get the alias analysis information that we need to update to reflect our
  // changes.
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

  // Get the callgraph information that we need to update to reflect our
  // changes.
  CallGraph &CG = getAnalysis<CallGraph>();
  
  // Get a new callgraph node for NF.
  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
  

  // Loop over all of the callers of the function, transforming the call sites
  // to pass in the loaded pointers.
  //
  SmallVector<Value*, 16> Args;
  while (!F->use_empty()) {
    CallSite CS = CallSite::get(F->use_back());
    assert(CS.getCalledFunction() == F);
    Instruction *Call = CS.getInstruction();
    const AttrListPtr &CallPAL = CS.getAttributes();

    // Add any return attributes.
    if (Attributes attrs = CallPAL.getRetAttributes())
      AttributesVec.push_back(AttributeWithIndex::get(0, attrs));

    // Loop over the operands, inserting GEP and loads in the caller as
    // appropriate.
    CallSite::arg_iterator AI = CS.arg_begin();
    ArgIndex = 1;
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
         I != E; ++I, ++AI, ++ArgIndex)
      if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
        Args.push_back(*AI);          // Unmodified argument

        if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
          AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));

      } else if (ByValArgsToTransform.count(I)) {
        // Emit a GEP and load for each element of the struct.
        const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
        const StructType *STy = cast<StructType>(AgTy);
        Value *Idxs[2] = {
              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
          Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,
                                                 (*AI)->getName()+"."+utostr(i),
                                                 Call);
          // TODO: Tell AA about the new values?
          Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
        }
      } else if (!I->use_empty()) {
        // Non-dead argument: insert GEPs and loads as appropriate.
        ScalarizeTable &ArgIndices = ScalarizedElements[I];
        // Store the Value* version of the indices in here, but declare it now
        // for reuse.
        std::vector<Value*> Ops;
        for (ScalarizeTable::iterator SI = ArgIndices.begin(),
               E = ArgIndices.end(); SI != E; ++SI) {
          Value *V = *AI;
          LoadInst *OrigLoad = OriginalLoads[*SI];
          if (!SI->empty()) {
            Ops.reserve(SI->size());
            const Type *ElTy = V->getType();
            for (IndicesVector::const_iterator II = SI->begin(),
                 IE = SI->end(); II != IE; ++II) {
              // Use i32 to index structs, and i64 for others (pointers/arrays).
              // This satisfies GEP constraints.
              const Type *IdxTy = (ElTy->isStructTy() ?
                    Type::getInt32Ty(F->getContext()) : 
                    Type::getInt64Ty(F->getContext()));
              Ops.push_back(ConstantInt::get(IdxTy, *II));
              // Keep track of the type we're currently indexing.
              ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
            }
            // And create a GEP to extract those indices.
            V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(),
                                          V->getName()+".idx", Call);
            Ops.clear();
            AA.copyValue(OrigLoad->getOperand(0), V);
          }
          // Since we're replacing a load make sure we take the alignment
          // of the previous load.
          LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
          newLoad->setAlignment(OrigLoad->getAlignment());
          Args.push_back(newLoad);
          AA.copyValue(OrigLoad, Args.back());
        }
      }

    if (ExtraArgHack)
      Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));

    // Push any varargs arguments on the list.
    for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
      Args.push_back(*AI);
      if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
        AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
    }

    // Add any function attributes.
    if (Attributes attrs = CallPAL.getFnAttributes())
      AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));

    Instruction *New;
    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
                               Args.begin(), Args.end(), "", Call);
      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
      cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                                          AttributesVec.end()));
    } else {
      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
      cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                                        AttributesVec.end()));
      if (cast<CallInst>(Call)->isTailCall())
        cast<CallInst>(New)->setTailCall();
    }
    Args.clear();
    AttributesVec.clear();

    // Update the alias analysis implementation to know that we are replacing
    // the old call with a new one.
    AA.replaceWithNewValue(Call, New);

    // Update the callgraph to know that the callsite has been transformed.
    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
    CalleeNode->replaceCallEdge(Call, New, NF_CGN);

    if (!Call->use_empty()) {
      Call->replaceAllUsesWith(New);
      New->takeName(Call);
    }

    // Finally, remove the old call from the program, reducing the use-count of
    // F.
    Call->eraseFromParent();
  }

  // Since we have now created the new function, splice the body of the old
  // function right into the new function, leaving the old rotting hulk of the
  // function empty.
  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());

  // Loop over the argument list, transfering uses of the old arguments over to
  // the new arguments, also transfering over the names as well.
  //
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
       I2 = NF->arg_begin(); I != E; ++I) {
    if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
      // If this is an unmodified argument, move the name and users over to the
      // new version.
      I->replaceAllUsesWith(I2);
      I2->takeName(I);
      AA.replaceWithNewValue(I, I2);
      ++I2;
      continue;
    }

    if (ByValArgsToTransform.count(I)) {
      // In the callee, we create an alloca, and store each of the new incoming
      // arguments into the alloca.
      Instruction *InsertPt = NF->begin()->begin();

      // Just add all the struct element types.
      const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
      const StructType *STy = cast<StructType>(AgTy);
      Value *Idxs[2] = {
            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };

      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
        Value *Idx = 
          GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
                                    TheAlloca->getName()+"."+Twine(i), 
                                    InsertPt);
        I2->setName(I->getName()+"."+Twine(i));
        new StoreInst(I2++, Idx, InsertPt);
      }

      // Anything that used the arg should now use the alloca.
      I->replaceAllUsesWith(TheAlloca);
      TheAlloca->takeName(I);
      AA.replaceWithNewValue(I, TheAlloca);
      continue;
    }

    if (I->use_empty()) {
      AA.deleteValue(I);
      continue;
    }

    // Otherwise, if we promoted this argument, then all users are load
    // instructions (or GEPs with only load users), and all loads should be
    // using the new argument that we added.
    ScalarizeTable &ArgIndices = ScalarizedElements[I];

    while (!I->use_empty()) {
      if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) {
        assert(ArgIndices.begin()->empty() &&
               "Load element should sort to front!");
        I2->setName(I->getName()+".val");
        LI->replaceAllUsesWith(I2);
        AA.replaceWithNewValue(LI, I2);
        LI->eraseFromParent();
        DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
              << "' in function '" << F->getName() << "'\n");
      } else {
        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
        IndicesVector Operands;
        Operands.reserve(GEP->getNumIndices());
        for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
             II != IE; ++II)
          Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());

        // GEPs with a single 0 index can be merged with direct loads
        if (Operands.size() == 1 && Operands.front() == 0)
          Operands.clear();

        Function::arg_iterator TheArg = I2;
        for (ScalarizeTable::iterator It = ArgIndices.begin();
             *It != Operands; ++It, ++TheArg) {
          assert(It != ArgIndices.end() && "GEP not handled??");
        }

        std::string NewName = I->getName();
        for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
            NewName += "." + utostr(Operands[i]);
        }
        NewName += ".val";
        TheArg->setName(NewName);

        DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
              << "' of function '" << NF->getName() << "'\n");

        // All of the uses must be load instructions.  Replace them all with
        // the argument specified by ArgNo.
        while (!GEP->use_empty()) {
          LoadInst *L = cast<LoadInst>(GEP->use_back());
          L->replaceAllUsesWith(TheArg);
          AA.replaceWithNewValue(L, TheArg);
          L->eraseFromParent();
        }
        AA.deleteValue(GEP);
        GEP->eraseFromParent();
      }
    }

    // Increment I2 past all of the arguments added for this promoted pointer.
    for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
      ++I2;
  }

  // Notify the alias analysis implementation that we inserted a new argument.
  if (ExtraArgHack)
    AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), 
                 NF->arg_begin());


  // Tell the alias analysis that the old function is about to disappear.
  AA.replaceWithNewValue(F, NF);

  
  NF_CGN->stealCalledFunctionsFrom(CG[F]);
  
  // Now that the old function is dead, delete it.  If there is a dangling
  // reference to the CallgraphNode, just leave the dead function around for
  // someone else to nuke.
  CallGraphNode *CGN = CG[F];
  if (CGN->getNumReferences() == 0)
    delete CG.removeFunctionFromModule(CGN);
  else
    F->setLinkage(Function::ExternalLinkage);
  
  return NF_CGN;
}

Example #6

Show file

File: PromoteMemoryToRegister.cpp Project: xatier/llvm

/// Many allocas are only used within a single basic block.  If this is the
/// case, avoid traversing the CFG and inserting a lot of potentially useless
/// PHI nodes by just performing a single linear pass over the basic block
/// using the Alloca.
///
/// If we cannot promote this alloca (because it is read before it is written),
/// return true.  This is necessary in cases where, due to control flow, the
/// alloca is potentially undefined on some control flow paths.  e.g. code like
/// this is potentially correct:
///
///   for (...) { if (c) { A = undef; undef = B; } }
///
/// ... so long as A is not used before undef is set.
static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
                                     LargeBlockInfo &LBI,
                                     AliasSetTracker *AST) {
  // The trickiest case to handle is when we have large blocks. Because of this,
  // this code is optimized assuming that large blocks happen.  This does not
  // significantly pessimize the small block case.  This uses LargeBlockInfo to
  // make it efficient to get the index of various operations in the block.

  // Walk the use-def list of the alloca, getting the locations of all stores.
  typedef SmallVector<std::pair<unsigned, StoreInst *>, 64> StoresByIndexTy;
  StoresByIndexTy StoresByIndex;

  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;
       ++UI)
    if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
      StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));

  // Sort the stores by their index, making it efficient to do a lookup with a
  // binary search.
  std::sort(StoresByIndex.begin(), StoresByIndex.end(),
            StoreIndexSearchPredicate());

  // Walk all of the loads from this alloca, replacing them with the nearest
  // store above them, if any.
  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
    LoadInst *LI = dyn_cast<LoadInst>(*UI++);
    if (!LI)
      continue;

    unsigned LoadIdx = LBI.getInstructionIndex(LI);

    // Find the nearest store that has a lower index than this load.
    StoresByIndexTy::iterator I =
        std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
                         std::make_pair(LoadIdx, static_cast<StoreInst *>(0)),
                         StoreIndexSearchPredicate());

    if (I == StoresByIndex.begin())
      // If there is no store before this load, the load takes the undef value.
      LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
    else
      // Otherwise, there was a store before this load, the load takes its value.
      LI->replaceAllUsesWith(llvm::prior(I)->second->getOperand(0));

    if (AST && LI->getType()->isPointerTy())
      AST->deleteValue(LI);
    LI->eraseFromParent();
    LBI.deleteValue(LI);
  }

  // Remove the (now dead) stores and alloca.
  while (!AI->use_empty()) {
    StoreInst *SI = cast<StoreInst>(AI->use_back());
    // Record debuginfo for the store before removing it.
    if (DbgDeclareInst *DDI = Info.DbgDeclare) {
      DIBuilder DIB(*AI->getParent()->getParent()->getParent());
      ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
    }
    SI->eraseFromParent();
    LBI.deleteValue(SI);
  }

  if (AST)
    AST->deleteValue(AI);
  AI->eraseFromParent();
  LBI.deleteValue(AI);

  // The alloca's debuginfo can be removed as well.
  if (DbgDeclareInst *DDI = Info.DbgDeclare)
    DDI->eraseFromParent();

  ++NumLocalPromoted;
}

Example #7

Show file

File: PromoteMemoryToRegister.cpp Project: xatier/llvm

/// \brief Rewrite as many loads as possible given a single store.
///
/// When there is only a single store, we can use the domtree to trivially
/// replace all of the dominated loads with the stored value. Do so, and return
/// true if this has successfully promoted the alloca entirely. If this returns
/// false there were some loads which were not dominated by the single store
/// and thus must be phi-ed with undef. We fall back to the standard alloca
/// promotion algorithm in that case.
static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
                                     LargeBlockInfo &LBI,
                                     DominatorTree &DT,
                                     AliasSetTracker *AST) {
  StoreInst *OnlyStore = Info.OnlyStore;
  bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
  BasicBlock *StoreBB = OnlyStore->getParent();
  int StoreIndex = -1;

  // Clear out UsingBlocks.  We will reconstruct it here if needed.
  Info.UsingBlocks.clear();

  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
    Instruction *UserInst = cast<Instruction>(*UI++);
    if (!isa<LoadInst>(UserInst)) {
      assert(UserInst == OnlyStore && "Should only have load/stores");
      continue;
    }
    LoadInst *LI = cast<LoadInst>(UserInst);

    // Okay, if we have a load from the alloca, we want to replace it with the
    // only value stored to the alloca.  We can do this if the value is
    // dominated by the store.  If not, we use the rest of the mem2reg machinery
    // to insert the phi nodes as needed.
    if (!StoringGlobalVal) { // Non-instructions are always dominated.
      if (LI->getParent() == StoreBB) {
        // If we have a use that is in the same block as the store, compare the
        // indices of the two instructions to see which one came first.  If the
        // load came before the store, we can't handle it.
        if (StoreIndex == -1)
          StoreIndex = LBI.getInstructionIndex(OnlyStore);

        if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
          // Can't handle this load, bail out.
          Info.UsingBlocks.push_back(StoreBB);
          continue;
        }

      } else if (LI->getParent() != StoreBB &&
                 !DT.dominates(StoreBB, LI->getParent())) {
        // If the load and store are in different blocks, use BB dominance to
        // check their relationships.  If the store doesn't dom the use, bail
        // out.
        Info.UsingBlocks.push_back(LI->getParent());
        continue;
      }
    }

    // Otherwise, we *can* safely rewrite this load.
    Value *ReplVal = OnlyStore->getOperand(0);
    // If the replacement value is the load, this must occur in unreachable
    // code.
    if (ReplVal == LI)
      ReplVal = UndefValue::get(LI->getType());
    LI->replaceAllUsesWith(ReplVal);
    if (AST && LI->getType()->isPointerTy())
      AST->deleteValue(LI);
    LI->eraseFromParent();
    LBI.deleteValue(LI);
  }

  // Finally, after the scan, check to see if the store is all that is left.
  if (!Info.UsingBlocks.empty())
    return false; // If not, we'll have to fall back for the remainder.

  // Record debuginfo for the store and remove the declaration's
  // debuginfo.
  if (DbgDeclareInst *DDI = Info.DbgDeclare) {
    DIBuilder DIB(*AI->getParent()->getParent()->getParent());
    ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
    DDI->eraseFromParent();
  }
  // Remove the (now dead) store and alloca.
  Info.OnlyStore->eraseFromParent();
  LBI.deleteValue(Info.OnlyStore);

  if (AST)
    AST->deleteValue(AI);
  AI->eraseFromParent();
  LBI.deleteValue(AI);
  return true;
}

Example #8

Show file

File: StructRetPromotion.cpp Project: dgohman/llvm-mirror

/// updateCallSites - Update all sites that call F to use NF.
CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) {
  CallGraph &CG = getAnalysis<CallGraph>();
  SmallVector<Value*, 16> Args;

  // Attributes - Keep track of the parameter attributes for the arguments.
  SmallVector<AttributeWithIndex, 8> ArgAttrsVec;

  // Get a new callgraph node for NF.
  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);

  while (!F->use_empty()) {
    CallSite CS(*F->use_begin());
    Instruction *Call = CS.getInstruction();

    const AttrListPtr &PAL = F->getAttributes();
    // Add any return attributes.
    if (Attributes attrs = PAL.getRetAttributes())
      ArgAttrsVec.push_back(AttributeWithIndex::get(0, attrs));

    // Copy arguments, however skip first one.
    CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
    Value *FirstCArg = *AI;
    ++AI;
    // 0th parameter attribute is reserved for return type.
    // 1th parameter attribute is for first 1st sret argument.
    unsigned ParamIndex = 2; 
    while (AI != AE) {
      Args.push_back(*AI); 
      if (Attributes Attrs = PAL.getParamAttributes(ParamIndex))
        ArgAttrsVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs));
      ++ParamIndex;
      ++AI;
    }

    // Add any function attributes.
    if (Attributes attrs = PAL.getFnAttributes())
      ArgAttrsVec.push_back(AttributeWithIndex::get(~0, attrs));
    
    AttrListPtr NewPAL = AttrListPtr::get(ArgAttrsVec.begin(), ArgAttrsVec.end());
    
    // Build new call instruction.
    Instruction *New;
    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
                               Args.begin(), Args.end(), "", Call);
      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
      cast<InvokeInst>(New)->setAttributes(NewPAL);
    } else {
      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
      cast<CallInst>(New)->setAttributes(NewPAL);
      if (cast<CallInst>(Call)->isTailCall())
        cast<CallInst>(New)->setTailCall();
    }
    Args.clear();
    ArgAttrsVec.clear();
    New->takeName(Call);

    // Update the callgraph to know that the callsite has been transformed.
    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
    CalleeNode->removeCallEdgeFor(Call);
    CalleeNode->addCalledFunction(New, NF_CGN);
    
    // Update all users of sret parameter to extract value using extractvalue.
    for (Value::use_iterator UI = FirstCArg->use_begin(), 
           UE = FirstCArg->use_end(); UI != UE; ) {
      User *U2 = *UI++;
      CallInst *C2 = dyn_cast<CallInst>(U2);
      if (C2 && (C2 == Call))
        continue;
      
      GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2);
      ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2));
      Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(),
                                           "evi", UGEP);
      while(!UGEP->use_empty()) {
        // isSafeToUpdateAllCallers has checked that all GEP uses are
        // LoadInsts
        LoadInst *L = cast<LoadInst>(*UGEP->use_begin());
        L->replaceAllUsesWith(GR);
        L->eraseFromParent();
      }
      UGEP->eraseFromParent();
      continue;
    }
    Call->eraseFromParent();
  }
  
  return NF_CGN;
}

Example #9

Show file

void LoadAndStorePromoter::
run(const SmallVectorImpl<Instruction*> &Insts) const {
  
  // First step: bucket up uses of the alloca by the block they occur in.
  // This is important because we have to handle multiple defs/uses in a block
  // ourselves: SSAUpdater is purely for cross-block references.
  // FIXME: Want a TinyVector<Instruction*> since there is often 0/1 element.
  DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
  
  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
    Instruction *User = Insts[i];
    UsesByBlock[User->getParent()].push_back(User);
  }
  
  // Okay, now we can iterate over all the blocks in the function with uses,
  // processing them.  Keep track of which loads are loading a live-in value.
  // Walk the uses in the use-list order to be determinstic.
  SmallVector<LoadInst*, 32> LiveInLoads;
  DenseMap<Value*, Value*> ReplacedLoads;
  
  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
    Instruction *User = Insts[i];
    BasicBlock *BB = User->getParent();
    std::vector<Instruction*> &BlockUses = UsesByBlock[BB];
    
    // If this block has already been processed, ignore this repeat use.
    if (BlockUses.empty()) continue;
    
    // Okay, this is the first use in the block.  If this block just has a
    // single user in it, we can rewrite it trivially.
    if (BlockUses.size() == 1) {
      // If it is a store, it is a trivial def of the value in the block.
      if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
        updateDebugInfo(SI);
        SSA.AddAvailableValue(BB, SI->getOperand(0));
      } else 
        // Otherwise it is a load, queue it to rewrite as a live-in load.
        LiveInLoads.push_back(cast<LoadInst>(User));
      BlockUses.clear();
      continue;
    }
    
    // Otherwise, check to see if this block is all loads.
    bool HasStore = false;
    for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
      if (isa<StoreInst>(BlockUses[i])) {
        HasStore = true;
        break;
      }
    }
    
    // If so, we can queue them all as live in loads.  We don't have an
    // efficient way to tell which on is first in the block and don't want to
    // scan large blocks, so just add all loads as live ins.
    if (!HasStore) {
      for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
        LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
      BlockUses.clear();
      continue;
    }
    
    // Otherwise, we have mixed loads and stores (or just a bunch of stores).
    // Since SSAUpdater is purely for cross-block values, we need to determine
    // the order of these instructions in the block.  If the first use in the
    // block is a load, then it uses the live in value.  The last store defines
    // the live out value.  We handle this by doing a linear scan of the block.
    Value *StoredValue = 0;
    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
        // If this is a load from an unrelated pointer, ignore it.
        if (!isInstInList(L, Insts)) continue;
        
        // If we haven't seen a store yet, this is a live in use, otherwise
        // use the stored value.
        if (StoredValue) {
          replaceLoadWithValue(L, StoredValue);
          L->replaceAllUsesWith(StoredValue);
          ReplacedLoads[L] = StoredValue;
        } else {
          LiveInLoads.push_back(L);
        }
        continue;
      }
      
      if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
        // If this is a store to an unrelated pointer, ignore it.
        if (!isInstInList(SI, Insts)) continue;
        updateDebugInfo(SI);

        // Remember that this is the active value in the block.
        StoredValue = SI->getOperand(0);
      }
    }
    
    // The last stored value that happened is the live-out for the block.
    assert(StoredValue && "Already checked that there is a store in block");
    SSA.AddAvailableValue(BB, StoredValue);
    BlockUses.clear();
  }
  
  // Okay, now we rewrite all loads that use live-in values in the loop,
  // inserting PHI nodes as necessary.
  for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
    LoadInst *ALoad = LiveInLoads[i];
    Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
    replaceLoadWithValue(ALoad, NewVal);

    // Avoid assertions in unreachable code.
    if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType());
    ALoad->replaceAllUsesWith(NewVal);
    ReplacedLoads[ALoad] = NewVal;
  }
  
  // Allow the client to do stuff before we start nuking things.
  doExtraRewritesBeforeFinalDeletion();
  
  // Now that everything is rewritten, delete the old instructions from the
  // function.  They should all be dead now.
  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
    Instruction *User = Insts[i];
    
    // If this is a load that still has uses, then the load must have been added
    // as a live value in the SSAUpdate data structure for a block (e.g. because
    // the loaded value was stored later).  In this case, we need to recursively
    // propagate the updates until we get to the real value.
    if (!User->use_empty()) {
      Value *NewVal = ReplacedLoads[User];
      assert(NewVal && "not a replaced load?");
      
      // Propagate down to the ultimate replacee.  The intermediately loads
      // could theoretically already have been deleted, so we don't want to
      // dereference the Value*'s.
      DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
      while (RLI != ReplacedLoads.end()) {
        NewVal = RLI->second;
        RLI = ReplacedLoads.find(NewVal);
      }
      
      replaceLoadWithValue(cast<LoadInst>(User), NewVal);
      User->replaceAllUsesWith(NewVal);
    }
    
    instructionDeleted(User);
    User->eraseFromParent();
  }
}

Example #10

Show file

File: PromoteMemoryToRegister.cpp Project: jamboree/llvm

/// Many allocas are only used within a single basic block.  If this is the
/// case, avoid traversing the CFG and inserting a lot of potentially useless
/// PHI nodes by just performing a single linear pass over the basic block
/// using the Alloca.
///
/// If we cannot promote this alloca (because it is read before it is written),
/// return false.  This is necessary in cases where, due to control flow, the
/// alloca is undefined only on some control flow paths.  e.g. code like
/// this is correct in LLVM IR:
///  // A is an alloca with no stores so far
///  for (...) {
///    int t = *A;
///    if (!first_iteration)
///      use(t);
///    *A = 42;
///  }
static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
                                     LargeBlockInfo &LBI,
                                     const DataLayout &DL,
                                     DominatorTree &DT,
                                     AssumptionCache *AC) {
  // The trickiest case to handle is when we have large blocks. Because of this,
  // this code is optimized assuming that large blocks happen.  This does not
  // significantly pessimize the small block case.  This uses LargeBlockInfo to
  // make it efficient to get the index of various operations in the block.

  // Walk the use-def list of the alloca, getting the locations of all stores.
  using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>;
  StoresByIndexTy StoresByIndex;

  for (User *U : AI->users())
    if (StoreInst *SI = dyn_cast<StoreInst>(U))
      StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));

  // Sort the stores by their index, making it efficient to do a lookup with a
  // binary search.
  llvm::sort(StoresByIndex, less_first());

  // Walk all of the loads from this alloca, replacing them with the nearest
  // store above them, if any.
  for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
    LoadInst *LI = dyn_cast<LoadInst>(*UI++);
    if (!LI)
      continue;

    unsigned LoadIdx = LBI.getInstructionIndex(LI);

    // Find the nearest store that has a lower index than this load.
    StoresByIndexTy::iterator I =
        std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
                         std::make_pair(LoadIdx,
                                        static_cast<StoreInst *>(nullptr)),
                         less_first());
    if (I == StoresByIndex.begin()) {
      if (StoresByIndex.empty())
        // If there are no stores, the load takes the undef value.
        LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
      else
        // There is no store before this load, bail out (load may be affected
        // by the following stores - see main comment).
        return false;
    } else {
      // Otherwise, there was a store before this load, the load takes its value.
      // Note, if the load was marked as nonnull we don't want to lose that
      // information when we erase it. So we preserve it with an assume.
      Value *ReplVal = std::prev(I)->second->getOperand(0);
      if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
          !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
        addAssumeNonNull(AC, LI);

      // If the replacement value is the load, this must occur in unreachable
      // code.
      if (ReplVal == LI)
        ReplVal = UndefValue::get(LI->getType());

      LI->replaceAllUsesWith(ReplVal);
    }

    LI->eraseFromParent();
    LBI.deleteValue(LI);
  }

  // Remove the (now dead) stores and alloca.
  while (!AI->use_empty()) {
    StoreInst *SI = cast<StoreInst>(AI->user_back());
    // Record debuginfo for the store before removing it.
    for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
      DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
      ConvertDebugDeclareToDebugValue(DII, SI, DIB);
    }
    SI->eraseFromParent();
    LBI.deleteValue(SI);
  }

  AI->eraseFromParent();
  LBI.deleteValue(AI);

  // The alloca's debuginfo can be removed as well.
  for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
    DII->eraseFromParent();
    LBI.deleteValue(DII);
  }

  ++NumLocalPromoted;
  return true;
}

Example #11

Show file

File: PromoteMemoryToRegister.cpp Project: jamboree/llvm

/// Rewrite as many loads as possible given a single store.
///
/// When there is only a single store, we can use the domtree to trivially
/// replace all of the dominated loads with the stored value. Do so, and return
/// true if this has successfully promoted the alloca entirely. If this returns
/// false there were some loads which were not dominated by the single store
/// and thus must be phi-ed with undef. We fall back to the standard alloca
/// promotion algorithm in that case.
static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
                                     LargeBlockInfo &LBI, const DataLayout &DL,
                                     DominatorTree &DT, AssumptionCache *AC) {
  StoreInst *OnlyStore = Info.OnlyStore;
  bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
  BasicBlock *StoreBB = OnlyStore->getParent();
  int StoreIndex = -1;

  // Clear out UsingBlocks.  We will reconstruct it here if needed.
  Info.UsingBlocks.clear();

  for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) {
    Instruction *UserInst = cast<Instruction>(*UI++);
    if (!isa<LoadInst>(UserInst)) {
      assert(UserInst == OnlyStore && "Should only have load/stores");
      continue;
    }
    LoadInst *LI = cast<LoadInst>(UserInst);

    // Okay, if we have a load from the alloca, we want to replace it with the
    // only value stored to the alloca.  We can do this if the value is
    // dominated by the store.  If not, we use the rest of the mem2reg machinery
    // to insert the phi nodes as needed.
    if (!StoringGlobalVal) { // Non-instructions are always dominated.
      if (LI->getParent() == StoreBB) {
        // If we have a use that is in the same block as the store, compare the
        // indices of the two instructions to see which one came first.  If the
        // load came before the store, we can't handle it.
        if (StoreIndex == -1)
          StoreIndex = LBI.getInstructionIndex(OnlyStore);

        if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
          // Can't handle this load, bail out.
          Info.UsingBlocks.push_back(StoreBB);
          continue;
        }
      } else if (LI->getParent() != StoreBB &&
                 !DT.dominates(StoreBB, LI->getParent())) {
        // If the load and store are in different blocks, use BB dominance to
        // check their relationships.  If the store doesn't dom the use, bail
        // out.
        Info.UsingBlocks.push_back(LI->getParent());
        continue;
      }
    }

    // Otherwise, we *can* safely rewrite this load.
    Value *ReplVal = OnlyStore->getOperand(0);
    // If the replacement value is the load, this must occur in unreachable
    // code.
    if (ReplVal == LI)
      ReplVal = UndefValue::get(LI->getType());

    // If the load was marked as nonnull we don't want to lose
    // that information when we erase this Load. So we preserve
    // it with an assume.
    if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
        !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))
      addAssumeNonNull(AC, LI);

    LI->replaceAllUsesWith(ReplVal);
    LI->eraseFromParent();
    LBI.deleteValue(LI);
  }

  // Finally, after the scan, check to see if the store is all that is left.
  if (!Info.UsingBlocks.empty())
    return false; // If not, we'll have to fall back for the remainder.

  // Record debuginfo for the store and remove the declaration's
  // debuginfo.
  for (DbgVariableIntrinsic *DII : Info.DbgDeclares) {
    DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
    ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
    DII->eraseFromParent();
    LBI.deleteValue(DII);
  }
  // Remove the (now dead) store and alloca.
  Info.OnlyStore->eraseFromParent();
  LBI.deleteValue(Info.OnlyStore);

  AI->eraseFromParent();
  LBI.deleteValue(AI);
  return true;
}

Example #12

Show file

File: LICM.cpp Project: CPFL/guc

/// PromoteAliasSet - Try to promote memory values to scalars by sinking
/// stores out of the loop and moving loads to before the loop.  We do this by
/// looping over the stores in the loop, looking for stores to Must pointers
/// which are loop invariant.
///
void LICM::PromoteAliasSet(AliasSet &AS) {
  // We can promote this alias set if it has a store, if it is a "Must" alias
  // set, if the pointer is loop invariant, and if we are not eliminating any
  // volatile loads or stores.
  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
      AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
    return;
  
  assert(!AS.empty() &&
         "Must alias set should have at least one pointer element in it!");
  Value *SomePtr = AS.begin()->getValue();

  // It isn't safe to promote a load/store from the loop if the load/store is
  // conditional.  For example, turning:
  //
  //    for () { if (c) *P += 1; }
  //
  // into:
  //
  //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
  //
  // is not safe, because *P may only be valid to access if 'c' is true.
  // 
  // It is safe to promote P if all uses are direct load/stores and if at
  // least one is guaranteed to be executed.
  bool GuaranteedToExecute = false;
  
  SmallVector<Instruction*, 64> LoopUses;
  SmallPtrSet<Value*, 4> PointerMustAliases;

  // Check that all of the pointers in the alias set have the same type.  We
  // cannot (yet) promote a memory location that is loaded and stored in
  // different sizes.
  for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
    Value *ASIV = ASI->getValue();
    PointerMustAliases.insert(ASIV);
    
    // Check that all of the pointers in the alias set have the same type.  We
    // cannot (yet) promote a memory location that is loaded and stored in
    // different sizes.
    if (SomePtr->getType() != ASIV->getType())
      return;
    
    for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
         UI != UE; ++UI) {
      // Ignore instructions that are outside the loop.
      Instruction *Use = dyn_cast<Instruction>(*UI);
      if (!Use || !CurLoop->contains(Use))
        continue;
      
      // If there is an non-load/store instruction in the loop, we can't promote
      // it.
      if (isa<LoadInst>(Use))
        assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
      else if (isa<StoreInst>(Use)) {
        assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
        if (Use->getOperand(0) == ASIV) return;
      } else
        return; // Not a load or store.
      
      if (!GuaranteedToExecute)
        GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
      
      LoopUses.push_back(Use);
    }
  }
  
  // If there isn't a guaranteed-to-execute instruction, we can't promote.
  if (!GuaranteedToExecute)
    return;
  
  // Otherwise, this is safe to promote, lets do it!
  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');  
  Changed = true;
  ++NumPromoted;

  // We use the SSAUpdater interface to insert phi nodes as required.
  SmallVector<PHINode*, 16> NewPHIs;
  SSAUpdater SSA(&NewPHIs);
  
  // It wants to know some value of the same type as what we'll be inserting.
  Value *SomeValue;
  if (isa<LoadInst>(LoopUses[0]))
    SomeValue = LoopUses[0];
  else
    SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
  SSA.Initialize(SomeValue->getType(), SomeValue->getName());

  // First step: bucket up uses of the pointers by the block they occur in.
  // This is important because we have to handle multiple defs/uses in a block
  // ourselves: SSAUpdater is purely for cross-block references.
  // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
  DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
  for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
    Instruction *User = LoopUses[i];
    UsesByBlock[User->getParent()].push_back(User);
  }
  
  // Okay, now we can iterate over all the blocks in the loop with uses,
  // processing them.  Keep track of which loads are loading a live-in value.
  SmallVector<LoadInst*, 32> LiveInLoads;
  DenseMap<Value*, Value*> ReplacedLoads;
  
  for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
    Instruction *User = LoopUses[LoopUse];
    std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
    
    // If this block has already been processed, ignore this repeat use.
    if (BlockUses.empty()) continue;
    
    // Okay, this is the first use in the block.  If this block just has a
    // single user in it, we can rewrite it trivially.
    if (BlockUses.size() == 1) {
      // If it is a store, it is a trivial def of the value in the block.
      if (isa<StoreInst>(User)) {
        SSA.AddAvailableValue(User->getParent(),
                              cast<StoreInst>(User)->getOperand(0));
      } else {
        // Otherwise it is a load, queue it to rewrite as a live-in load.
        LiveInLoads.push_back(cast<LoadInst>(User));
      }
      BlockUses.clear();
      continue;
    }
    
    // Otherwise, check to see if this block is all loads.  If so, we can queue
    // them all as live in loads.
    bool HasStore = false;
    for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
      if (isa<StoreInst>(BlockUses[i])) {
        HasStore = true;
        break;
      }
    }
    
    if (!HasStore) {
      for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
        LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
      BlockUses.clear();
      continue;
    }

    // Otherwise, we have mixed loads and stores (or just a bunch of stores).
    // Since SSAUpdater is purely for cross-block values, we need to determine
    // the order of these instructions in the block.  If the first use in the
    // block is a load, then it uses the live in value.  The last store defines
    // the live out value.  We handle this by doing a linear scan of the block.
    BasicBlock *BB = User->getParent();
    Value *StoredValue = 0;
    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
        // If this is a load from an unrelated pointer, ignore it.
        if (!PointerMustAliases.count(L->getOperand(0))) continue;

        // If we haven't seen a store yet, this is a live in use, otherwise
        // use the stored value.
        if (StoredValue) {
          L->replaceAllUsesWith(StoredValue);
          ReplacedLoads[L] = StoredValue;
        } else {
          LiveInLoads.push_back(L);
        }
        continue;
      }
      
      if (StoreInst *S = dyn_cast<StoreInst>(II)) {
        // If this is a store to an unrelated pointer, ignore it.
        if (!PointerMustAliases.count(S->getOperand(1))) continue;

        // Remember that this is the active value in the block.
        StoredValue = S->getOperand(0);
      }
    }
    
    // The last stored value that happened is the live-out for the block.
    assert(StoredValue && "Already checked that there is a store in block");
    SSA.AddAvailableValue(BB, StoredValue);
    BlockUses.clear();
  }
  
  // Now that all the intra-loop values are classified, set up the preheader.
  // It gets a load of the pointer we're promoting, and it is the live-out value
  // from the preheader.
  LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
                                         Preheader->getTerminator());
  SSA.AddAvailableValue(Preheader, PreheaderLoad);

  // Now that the preheader is good to go, set up the exit blocks.  Each exit
  // block gets a store of the live-out values that feed them.  Since we've
  // already told the SSA updater about the defs in the loop and the preheader
  // definition, it is all set and we can start using it.
  SmallVector<BasicBlock*, 8> ExitBlocks;
  CurLoop->getUniqueExitBlocks(ExitBlocks);
  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
    BasicBlock *ExitBlock = ExitBlocks[i];
    Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
    Instruction *InsertPos = ExitBlock->getFirstNonPHI();
    new StoreInst(LiveInValue, SomePtr, InsertPos);
  }

  // Okay, now we rewrite all loads that use live-in values in the loop,
  // inserting PHI nodes as necessary.
  for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
    LoadInst *ALoad = LiveInLoads[i];
    Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
    ALoad->replaceAllUsesWith(NewVal);
    CurAST->copyValue(ALoad, NewVal);
    ReplacedLoads[ALoad] = NewVal;
  }
  
  // If the preheader load is itself a pointer, we need to tell alias analysis
  // about the new pointer we created in the preheader block and about any PHI
  // nodes that just got inserted.
  if (PreheaderLoad->getType()->isPointerTy()) {
    // Copy any value stored to or loaded from a must-alias of the pointer.
    CurAST->copyValue(SomeValue, PreheaderLoad);
    
    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
      CurAST->copyValue(SomeValue, NewPHIs[i]);
  }
  
  // Now that everything is rewritten, delete the old instructions from the body
  // of the loop.  They should all be dead now.
  for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
    Instruction *User = LoopUses[i];
    
    // If this is a load that still has uses, then the load must have been added
    // as a live value in the SSAUpdate data structure for a block (e.g. because
    // the loaded value was stored later).  In this case, we need to recursively
    // propagate the updates until we get to the real value.
    if (!User->use_empty()) {
      Value *NewVal = ReplacedLoads[User];
      assert(NewVal && "not a replaced load?");
      
      // Propagate down to the ultimate replacee.  The intermediately loads
      // could theoretically already have been deleted, so we don't want to
      // dereference the Value*'s.
      DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
      while (RLI != ReplacedLoads.end()) {
        NewVal = RLI->second;
        RLI = ReplacedLoads.find(NewVal);
      }
      
      User->replaceAllUsesWith(NewVal);
      CurAST->copyValue(User, NewVal);
    }
    
    CurAST->deleteValue(User);
    User->eraseFromParent();
  }
  
  // fwew, we're done!
}

Example #13

Show file

File: ArgumentPromotion.cpp Project: mkurdej/llvm

/// DoPromotion - This method actually performs the promotion of the specified
/// arguments, and returns the new function.  At this point, we know that it's
/// safe to do so.
static Function *
doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
            SmallPtrSetImpl<Argument *> &ByValArgsToTransform,
            Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
                ReplaceCallSite) {
  // Start by computing a new prototype for the function, which is the same as
  // the old function, but has modified arguments.
  FunctionType *FTy = F->getFunctionType();
  std::vector<Type *> Params;

  using ScalarizeTable = std::set<std::pair<Type *, IndicesVector>>;

  // ScalarizedElements - If we are promoting a pointer that has elements
  // accessed out of it, keep track of which elements are accessed so that we
  // can add one argument for each.
  //
  // Arguments that are directly loaded will have a zero element value here, to
  // handle cases where there are both a direct load and GEP accesses.
  std::map<Argument *, ScalarizeTable> ScalarizedElements;

  // OriginalLoads - Keep track of a representative load instruction from the
  // original function so that we can tell the alias analysis implementation
  // what the new GEP/Load instructions we are inserting look like.
  // We need to keep the original loads for each argument and the elements
  // of the argument that are accessed.
  std::map<std::pair<Argument *, IndicesVector>, LoadInst *> OriginalLoads;

  // Attribute - Keep track of the parameter attributes for the arguments
  // that we are *not* promoting. For the ones that we do promote, the parameter
  // attributes are lost
  SmallVector<AttributeSet, 8> ArgAttrVec;
  AttributeList PAL = F->getAttributes();

  // First, determine the new argument list
  unsigned ArgNo = 0;
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
       ++I, ++ArgNo) {
    if (ByValArgsToTransform.count(&*I)) {
      // Simple byval argument? Just add all the struct element types.
      Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      StructType *STy = cast<StructType>(AgTy);
      Params.insert(Params.end(), STy->element_begin(), STy->element_end());
      ArgAttrVec.insert(ArgAttrVec.end(), STy->getNumElements(),
                        AttributeSet());
      ++NumByValArgsPromoted;
    } else if (!ArgsToPromote.count(&*I)) {
      // Unchanged argument
      Params.push_back(I->getType());
      ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo));
    } else if (I->use_empty()) {
      // Dead argument (which are always marked as promotable)
      ++NumArgumentsDead;

      // There may be remaining metadata uses of the argument for things like
      // llvm.dbg.value. Replace them with undef.
      I->replaceAllUsesWith(UndefValue::get(I->getType()));
    } else {
      // Okay, this is being promoted. This means that the only uses are loads
      // or GEPs which are only used by loads

      // In this table, we will track which indices are loaded from the argument
      // (where direct loads are tracked as no indices).
      ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
      for (User *U : I->users()) {
        Instruction *UI = cast<Instruction>(U);
        Type *SrcTy;
        if (LoadInst *L = dyn_cast<LoadInst>(UI))
          SrcTy = L->getType();
        else
          SrcTy = cast<GetElementPtrInst>(UI)->getSourceElementType();
        IndicesVector Indices;
        Indices.reserve(UI->getNumOperands() - 1);
        // Since loads will only have a single operand, and GEPs only a single
        // non-index operand, this will record direct loads without any indices,
        // and gep+loads with the GEP indices.
        for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
             II != IE; ++II)
          Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
        // GEPs with a single 0 index can be merged with direct loads
        if (Indices.size() == 1 && Indices.front() == 0)
          Indices.clear();
        ArgIndices.insert(std::make_pair(SrcTy, Indices));
        LoadInst *OrigLoad;
        if (LoadInst *L = dyn_cast<LoadInst>(UI))
          OrigLoad = L;
        else
          // Take any load, we will use it only to update Alias Analysis
          OrigLoad = cast<LoadInst>(UI->user_back());
        OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
      }

      // Add a parameter to the function for each element passed in.
      for (const auto &ArgIndex : ArgIndices) {
        // not allowed to dereference ->begin() if size() is 0
        Params.push_back(GetElementPtrInst::getIndexedType(
            cast<PointerType>(I->getType()->getScalarType())->getElementType(),
            ArgIndex.second));
        ArgAttrVec.push_back(AttributeSet());
        assert(Params.back());
      }

      if (ArgIndices.size() == 1 && ArgIndices.begin()->second.empty())
        ++NumArgumentsPromoted;
      else
        ++NumAggregatesPromoted;
    }
  }

  Type *RetTy = FTy->getReturnType();

  // Construct the new function type using the new arguments.
  FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());

  // Create the new function body and insert it into the module.
  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
  NF->copyAttributesFrom(F);

  // Patch the pointer to LLVM function in debug info descriptor.
  NF->setSubprogram(F->getSubprogram());
  F->setSubprogram(nullptr);

  DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
               << "From: " << *F);

  // Recompute the parameter attributes list based on the new arguments for
  // the function.
  NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(),
                                       PAL.getRetAttributes(), ArgAttrVec));
  ArgAttrVec.clear();

  F->getParent()->getFunctionList().insert(F->getIterator(), NF);
  NF->takeName(F);

  // Loop over all of the callers of the function, transforming the call sites
  // to pass in the loaded pointers.
  //
  SmallVector<Value *, 16> Args;
  while (!F->use_empty()) {
    CallSite CS(F->user_back());
    assert(CS.getCalledFunction() == F);
    Instruction *Call = CS.getInstruction();
    const AttributeList &CallPAL = CS.getAttributes();

    // Loop over the operands, inserting GEP and loads in the caller as
    // appropriate.
    CallSite::arg_iterator AI = CS.arg_begin();
    ArgNo = 0;
    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
         ++I, ++AI, ++ArgNo)
      if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
        Args.push_back(*AI); // Unmodified argument
        ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
      } else if (ByValArgsToTransform.count(&*I)) {
        // Emit a GEP and load for each element of the struct.
        Type *AgTy = cast<PointerType>(I->getType())->getElementType();
        StructType *STy = cast<StructType>(AgTy);
        Value *Idxs[2] = {
            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr};
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
          Value *Idx = GetElementPtrInst::Create(
              STy, *AI, Idxs, (*AI)->getName() + "." + Twine(i), Call);
          // TODO: Tell AA about the new values?
          Args.push_back(new LoadInst(Idx, Idx->getName() + ".val", Call));
          ArgAttrVec.push_back(AttributeSet());
        }
      } else if (!I->use_empty()) {
        // Non-dead argument: insert GEPs and loads as appropriate.
        ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
        // Store the Value* version of the indices in here, but declare it now
        // for reuse.
        std::vector<Value *> Ops;
        for (const auto &ArgIndex : ArgIndices) {
          Value *V = *AI;
          LoadInst *OrigLoad =
              OriginalLoads[std::make_pair(&*I, ArgIndex.second)];
          if (!ArgIndex.second.empty()) {
            Ops.reserve(ArgIndex.second.size());
            Type *ElTy = V->getType();
            for (auto II : ArgIndex.second) {
              // Use i32 to index structs, and i64 for others (pointers/arrays).
              // This satisfies GEP constraints.
              Type *IdxTy =
                  (ElTy->isStructTy() ? Type::getInt32Ty(F->getContext())
                                      : Type::getInt64Ty(F->getContext()));
              Ops.push_back(ConstantInt::get(IdxTy, II));
              // Keep track of the type we're currently indexing.
              if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
                ElTy = ElPTy->getElementType();
              else
                ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
            }
            // And create a GEP to extract those indices.
            V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
                                          V->getName() + ".idx", Call);
            Ops.clear();
          }
          // Since we're replacing a load make sure we take the alignment
          // of the previous load.
          LoadInst *newLoad = new LoadInst(V, V->getName() + ".val", Call);
          newLoad->setAlignment(OrigLoad->getAlignment());
          // Transfer the AA info too.
          AAMDNodes AAInfo;
          OrigLoad->getAAMetadata(AAInfo);
          newLoad->setAAMetadata(AAInfo);

          Args.push_back(newLoad);
          ArgAttrVec.push_back(AttributeSet());
        }
      }

    // Push any varargs arguments on the list.
    for (; AI != CS.arg_end(); ++AI, ++ArgNo) {
      Args.push_back(*AI);
      ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
    }

    SmallVector<OperandBundleDef, 1> OpBundles;
    CS.getOperandBundlesAsDefs(OpBundles);

    CallSite NewCS;
    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
      NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
                                 Args, OpBundles, "", Call);
    } else {
      auto *NewCall = CallInst::Create(NF, Args, OpBundles, "", Call);
      NewCall->setTailCallKind(cast<CallInst>(Call)->getTailCallKind());
      NewCS = NewCall;
    }
    NewCS.setCallingConv(CS.getCallingConv());
    NewCS.setAttributes(
        AttributeList::get(F->getContext(), CallPAL.getFnAttributes(),
                           CallPAL.getRetAttributes(), ArgAttrVec));
    NewCS->setDebugLoc(Call->getDebugLoc());
    uint64_t W;
    if (Call->extractProfTotalWeight(W))
      NewCS->setProfWeight(W);
    Args.clear();
    ArgAttrVec.clear();

    // Update the callgraph to know that the callsite has been transformed.
    if (ReplaceCallSite)
      (*ReplaceCallSite)(CS, NewCS);

    if (!Call->use_empty()) {
      Call->replaceAllUsesWith(NewCS.getInstruction());
      NewCS->takeName(Call);
    }

    // Finally, remove the old call from the program, reducing the use-count of
    // F.
    Call->eraseFromParent();
  }

  const DataLayout &DL = F->getParent()->getDataLayout();

  // Since we have now created the new function, splice the body of the old
  // function right into the new function, leaving the old rotting hulk of the
  // function empty.
  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());

  // Loop over the argument list, transferring uses of the old arguments over to
  // the new arguments, also transferring over the names as well.
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
                              I2 = NF->arg_begin();
       I != E; ++I) {
    if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
      // If this is an unmodified argument, move the name and users over to the
      // new version.
      I->replaceAllUsesWith(&*I2);
      I2->takeName(&*I);
      ++I2;
      continue;
    }

    if (ByValArgsToTransform.count(&*I)) {
      // In the callee, we create an alloca, and store each of the new incoming
      // arguments into the alloca.
      Instruction *InsertPt = &NF->begin()->front();

      // Just add all the struct element types.
      Type *AgTy = cast<PointerType>(I->getType())->getElementType();
      Value *TheAlloca = new AllocaInst(AgTy, DL.getAllocaAddrSpace(), nullptr,
                                        I->getParamAlignment(), "", InsertPt);
      StructType *STy = cast<StructType>(AgTy);
      Value *Idxs[2] = {ConstantInt::get(Type::getInt32Ty(F->getContext()), 0),
                        nullptr};

      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
        Value *Idx = GetElementPtrInst::Create(
            AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
            InsertPt);
        I2->setName(I->getName() + "." + Twine(i));
        new StoreInst(&*I2++, Idx, InsertPt);
      }

      // Anything that used the arg should now use the alloca.
      I->replaceAllUsesWith(TheAlloca);
      TheAlloca->takeName(&*I);

      // If the alloca is used in a call, we must clear the tail flag since
      // the callee now uses an alloca from the caller.
      for (User *U : TheAlloca->users()) {
        CallInst *Call = dyn_cast<CallInst>(U);
        if (!Call)
          continue;
        Call->setTailCall(false);
      }
      continue;
    }

    if (I->use_empty())
      continue;

    // Otherwise, if we promoted this argument, then all users are load
    // instructions (or GEPs with only load users), and all loads should be
    // using the new argument that we added.
    ScalarizeTable &ArgIndices = ScalarizedElements[&*I];

    while (!I->use_empty()) {
      if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
        assert(ArgIndices.begin()->second.empty() &&
               "Load element should sort to front!");
        I2->setName(I->getName() + ".val");
        LI->replaceAllUsesWith(&*I2);
        LI->eraseFromParent();
        DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
                     << "' in function '" << F->getName() << "'\n");
      } else {
        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->user_back());
        IndicesVector Operands;
        Operands.reserve(GEP->getNumIndices());
        for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
             II != IE; ++II)
          Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());

        // GEPs with a single 0 index can be merged with direct loads
        if (Operands.size() == 1 && Operands.front() == 0)
          Operands.clear();

        Function::arg_iterator TheArg = I2;
        for (ScalarizeTable::iterator It = ArgIndices.begin();
             It->second != Operands; ++It, ++TheArg) {
          assert(It != ArgIndices.end() && "GEP not handled??");
        }

        std::string NewName = I->getName();
        for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
          NewName += "." + utostr(Operands[i]);
        }
        NewName += ".val";
        TheArg->setName(NewName);

        DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
                     << "' of function '" << NF->getName() << "'\n");

        // All of the uses must be load instructions.  Replace them all with
        // the argument specified by ArgNo.
        while (!GEP->use_empty()) {
          LoadInst *L = cast<LoadInst>(GEP->user_back());
          L->replaceAllUsesWith(&*TheArg);
          L->eraseFromParent();
        }
        GEP->eraseFromParent();
      }
    }

    // Increment I2 past all of the arguments added for this promoted pointer.
    std::advance(I2, ArgIndices.size());
  }

  return NF;
}

Example #14

Show file

File: futamurize.cpp Project: peufeu/llvm-futamura-experiments

Function * futamurize( const Function * orig_func, DenseMap<const Value*, Value*> &argmap, std::set<const unsigned char *> &constant_addresses_set )
{
	LLVMContext &context = getGlobalContext();
	
	
	// Make a copy of the function, removing constant arguments
	Function * specialized_func = CloneFunction( orig_func, argmap );
	specialized_func->setName( orig_func->getNameStr() + "_1" );
	
	// add it to our module
	LLVM_Module->getFunctionList().push_back( specialized_func );
	
	printf("\nspecialized_func = %p <%s>\n", specialized_func, specialized_func->getName().data());
	//~ specialized_func->dump();

	// Optimize it
	FunctionPassManager PM( LLVM_Module );
	createStandardFunctionPasses( &PM, 3 );
	
	PM.add(createScalarReplAggregatesPass());  // Break up aggregate allocas
	PM.add(createInstructionCombiningPass());  // Cleanup for scalarrepl.
	PM.add(createJumpThreadingPass());         // Thread jumps.
	PM.add(createCFGSimplificationPass());     // Merge & remove BBs
	PM.add(createInstructionCombiningPass());  // Combine silly seq's
	PM.add(createTailCallEliminationPass());   // Eliminate tail calls
	PM.add(createCFGSimplificationPass());     // Merge & remove BBs
	PM.add(createReassociatePass());           // Reassociate expressions
	PM.add(createLoopRotatePass());            // Rotate Loop
	PM.add(createLICMPass());                  // Hoist loop invariants
	PM.add(createLoopUnswitchPass( false ));
	PM.add(createInstructionCombiningPass());
	PM.add(createIndVarSimplifyPass());        // Canonicalize indvars
	PM.add(createLoopDeletionPass());          // Delete dead loops
	PM.add(createLoopUnroll2Pass());            // Unroll small loops
	PM.add(createInstructionCombiningPass());  // Clean up after the unroller
	PM.add(createGVNPass());                   // Remove redundancies
	PM.add(createMemCpyOptPass());             // Remove memcpy / form memset
	PM.add(createSCCPPass());                  // Constant prop with SCCP
	PM.add(createPromoteMemoryToRegisterPass()); 
	PM.add(createConstantPropagationPass());            
	PM.add(createDeadStoreEliminationPass());            
	PM.add(createAggressiveDCEPass());            
	PM.add(new MemoryDependenceAnalysis());            
	//~ PM.add(createAAEvalPass());              
	
	const PassInfo * pinfo = Pass::lookupPassInfo( "print-alias-sets" );
	if( !pinfo ) { printf( "print-alias-sets not found\n" ); exit(-1); }
	PM.add( pinfo->createPass() );
	
	FunctionPassManager PM_Inline( LLVM_Module );
	PM_Inline.add(createSingleFunctionInliningPass());            
	
	bool Changed = false;
	int iterations = 2;
	int inline_iterations = 6;
	
	do
	{
		Changed = false;
		
		// first do some optimizations
		PM.doInitialization();
		PM.run( *specialized_func );
		PM.doFinalization();
		
		// Load from Constant Memory detection
		const TargetData *TD = LLVM_EE->getTargetData();
		
		for (inst_iterator I = inst_begin(specialized_func), E = inst_end(specialized_func); I != E; ++I) 
		{
			Instruction * inst = (Instruction *) &*I;

			// get all Load instructions
			LoadInst * load = dyn_cast<LoadInst>( inst );
			if( !load ) continue;
			if( load->isVolatile() ) continue;

			if (load->use_empty()) continue;        // Don't muck with dead instructions...

			// get the address loaded by load instruction
			Value *ptr_value = load->getPointerOperand();
			
			// we're only interested in constant addresses
			ConstantExpr * ptr_constant_expr =  dyn_cast<ConstantExpr>( ptr_value );
			if( !ptr_constant_expr ) continue;			
			ptr_constant_expr->dump();
			
			// compute real address of constant pointer expression
			Constant * ptr_constant = ConstantFoldConstantExpression( ptr_constant_expr, TD );
			if( !ptr_constant ) continue;
			ptr_constant->dump();
			
			// convert to int constant
			ConstantInt *int_constant =  dyn_cast<ConstantInt>( ConstantExpr::getPtrToInt( ptr_constant, Type::getInt64Ty( context )));
			if( !int_constant ) continue;
			int_constant->dump();
			
			// get data size
			int data_length = TD->getTypeAllocSize( load->getType() );
			ptr_value->getType()->dump();
			
			// get real address (at last !)
			const unsigned char * c_ptr = (const unsigned char *) int_constant->getLimitedValue();
			
			printf( "%ld %d %d\n", c_ptr, constant_addresses_set.count( c_ptr ), data_length );
			
			// check what's in this address	
			int isconst = 1;
			for( int offset=0; offset<data_length; offset++ )
				isconst &= constant_addresses_set.count( c_ptr + offset );
			
			if( !isconst ) continue;
			printf( "It is constant.\n" );
			
			// make a LLVM const with the data
			Constant *new_constant = NULL;
			switch( data_length )
			{
				case 1:	new_constant = ConstantInt::get( Type::getInt8Ty( context ),  *(uint8_t*)c_ptr, false /* signed */ );	break;
				case 2:	new_constant = ConstantInt::get( Type::getInt16Ty( context ), *(uint16_t*)c_ptr, false /* signed */ );	break;
				case 4:	new_constant = ConstantInt::get( Type::getInt32Ty( context ), *(uint32_t*)c_ptr, false /* signed */ );	break;
				case 8:	new_constant = ConstantInt::get( Type::getInt64Ty( context ), *(uint64_t*)c_ptr, false /* signed */ );	break;
				default:
				{
					StringRef const_data ( (const char *) c_ptr, data_length );
					new_constant = ConstantArray::get( context, const_data, false /* dont add terminating null */ );
				}
			}
			
			if( !new_constant ) continue;
			
			new_constant->dump();
							
			//~ // get the type that is loaded
			const Type *Ty = load->getType();
			
			// do we need a cast ?
			if( load->getType() != new_constant->getType() )
			{
				new_constant = ConstantExpr::getBitCast( new_constant, Ty );
				new_constant->dump();
			}
			
			// zap the load and replace with constant address
			load->replaceAllUsesWith( new_constant );
			printf( "\nREPLACED :...\n" );
			load->dump();
			new_constant->dump();
			
			Changed = true;
		}	
		
		if( Changed )
			continue;	// re-optimize and do another pass of constant load elimination
		
		// if we can't do anything else, do an inlining pass
		if( inline_iterations > 0 )
		{
			inline_iterations --;
			
			PM_Inline.doInitialization();
			Changed |= PM_Inline.run( *specialized_func );
			PM_Inline.doFinalization();

			//~ for( int i=0; i<3; i++ )
			{
				PM.doInitialization();
				Changed |= PM.run( *specialized_func );
				PM.doFinalization();
			}
		}
		
		if( iterations>0 && !Changed ) 
			iterations--;
	} while( Changed || iterations>0 );
	
	return specialized_func;
}