Esempio n. 1
0
/// isSafeToPromoteArgument - As you might guess from the name of this method,
/// it checks to see if it is both safe and useful to promote the argument.
/// This method limits promotion of aggregates to only promote up to three
/// elements of the aggregate in order to avoid exploding the number of
/// arguments passed in.
bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
  typedef std::set<IndicesVector> GEPIndicesSet;

  // Quick exit for unused arguments
  if (Arg->use_empty())
    return true;

  // We can only promote this argument if all of the uses are loads, or are GEP
  // instructions (with constant indices) that are subsequently loaded.
  //
  // Promoting the argument causes it to be loaded in the caller
  // unconditionally. This is only safe if we can prove that either the load
  // would have happened in the callee anyway (ie, there is a load in the entry
  // block) or the pointer passed in at every call site is guaranteed to be
  // valid.
  // In the former case, invalid loads can happen, but would have happened
  // anyway, in the latter case, invalid loads won't happen. This prevents us
  // from introducing an invalid load that wouldn't have happened in the
  // original code.
  //
  // This set will contain all sets of indices that are loaded in the entry
  // block, and thus are safe to unconditionally load in the caller.
  GEPIndicesSet SafeToUnconditionallyLoad;

  // This set contains all the sets of indices that we are planning to promote.
  // This makes it possible to limit the number of arguments added.
  GEPIndicesSet ToPromote;

  // If the pointer is always valid, any load with first index 0 is valid.
  if (isByVal || AllCalleesPassInValidPointerForArgument(Arg))
    SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));

  // First, iterate the entry block and mark loads of (geps of) arguments as
  // safe.
  BasicBlock *EntryBlock = Arg->getParent()->begin();
  // Declare this here so we can reuse it
  IndicesVector Indices;
  for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
       I != E; ++I)
    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
      Value *V = LI->getPointerOperand();
      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
        V = GEP->getPointerOperand();
        if (V == Arg) {
          // This load actually loads (part of) Arg? Check the indices then.
          Indices.reserve(GEP->getNumIndices());
          for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
               II != IE; ++II)
            if (ConstantInt *CI = dyn_cast<ConstantInt>(*II))
              Indices.push_back(CI->getSExtValue());
            else
              // We found a non-constant GEP index for this argument? Bail out
              // right away, can't promote this argument at all.
              return false;

          // Indices checked out, mark them as safe
          MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
          Indices.clear();
        }
      } else if (V == Arg) {
        // Direct loads are equivalent to a GEP with a single 0 index.
        MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
      }
    }

  // Now, iterate all uses of the argument to see if there are any uses that are
  // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
  SmallVector<LoadInst*, 16> Loads;
  IndicesVector Operands;
  for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
       UI != E; ++UI) {
    User *U = *UI;
    Operands.clear();
    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
      if (LI->isVolatile()) return false;  // Don't hack volatile loads
      Loads.push_back(LI);
      // Direct loads are equivalent to a GEP with a zero index and then a load.
      Operands.push_back(0);
    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
      if (GEP->use_empty()) {
        // Dead GEP's cause trouble later.  Just remove them if we run into
        // them.
        getAnalysis<AliasAnalysis>().deleteValue(GEP);
        GEP->eraseFromParent();
        // TODO: This runs the above loop over and over again for dead GEPs
        // Couldn't we just do increment the UI iterator earlier and erase the
        // use?
        return isSafeToPromoteArgument(Arg, isByVal);
      }

      // Ensure that all of the indices are constants.
      for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
        i != e; ++i)
        if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
          Operands.push_back(C->getSExtValue());
        else
          return false;  // Not a constant operand GEP!

      // Ensure that the only users of the GEP are load instructions.
      for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
           UI != E; ++UI)
        if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
          if (LI->isVolatile()) return false;  // Don't hack volatile loads
          Loads.push_back(LI);
        } else {
          // Other uses than load?
          return false;
        }
    } else {
      return false;  // Not a load or a GEP.
    }

    // Now, see if it is safe to promote this load / loads of this GEP. Loading
    // is safe if Operands, or a prefix of Operands, is marked as safe.
    if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
      return false;

    // See if we are already promoting a load with these indices. If not, check
    // to make sure that we aren't promoting too many elements.  If so, nothing
    // to do.
    if (ToPromote.find(Operands) == ToPromote.end()) {
      if (maxElements > 0 && ToPromote.size() == maxElements) {
        DEBUG(dbgs() << "argpromotion not promoting argument '"
              << Arg->getName() << "' because it would require adding more "
              << "than " << maxElements << " arguments to the function.\n");
        // We limit aggregate promotion to only promoting up to a fixed number
        // of elements of the aggregate.
        return false;
      }
      ToPromote.insert(Operands);
    }
  }

  if (Loads.empty()) return true;  // No users, this is a dead argument.

  // Okay, now we know that the argument is only used by load instructions and
  // it is safe to unconditionally perform all of them. Use alias analysis to
  // check to see if the pointer is guaranteed to not be modified from entry of
  // the function to each of the load instructions.

  // Because there could be several/many load instructions, remember which
  // blocks we know to be transparent to the load.
  SmallPtrSet<BasicBlock*, 16> TranspBlocks;

  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  TargetData *TD = getAnalysisIfAvailable<TargetData>();
  if (!TD) return false; // Without TargetData, assume the worst.

  for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
    // Check to see if the load is invalidated from the start of the block to
    // the load itself.
    LoadInst *Load = Loads[i];
    BasicBlock *BB = Load->getParent();

    const PointerType *LoadTy =
      cast<PointerType>(Load->getPointerOperand()->getType());
    unsigned LoadSize =(unsigned)TD->getTypeStoreSize(LoadTy->getElementType());

    if (AA.canInstructionRangeModify(BB->front(), *Load, Arg, LoadSize))
      return false;  // Pointer is invalidated!

    // Now check every path from the entry block to the load for transparency.
    // To do this, we perform a depth first search on the inverse CFG from the
    // loading block.
    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
      BasicBlock *P = *PI;
      for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
             I = idf_ext_begin(P, TranspBlocks),
             E = idf_ext_end(P, TranspBlocks); I != E; ++I)
        if (AA.canBasicBlockModify(**I, Arg, LoadSize))
          return false;
    }
  }

  // If the path from the entry of the function to each load is free of
  // instructions that potentially invalidate the load, we can make the
  // transformation!
  return true;
}
Esempio n. 2
0
/// processStore - When GVN is scanning forward over instructions, we look for
/// some other patterns to fold away.  In particular, this looks for stores to
/// neighboring locations of memory.  If it sees enough consequtive ones
/// (currently 4) it attempts to merge them together into a memcpy/memset.
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
  if (SI->isVolatile()) return false;
  
  TargetData *TD = getAnalysisIfAvailable<TargetData>();
  if (!TD) return false;

  // Detect cases where we're performing call slot forwarding, but
  // happen to be using a load-store pair to implement it, rather than
  // a memcpy.
  if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
    if (!LI->isVolatile() && LI->hasOneUse()) {
      MemDepResult dep = MD->getDependency(LI);
      CallInst *C = 0;
      if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
        C = dyn_cast<CallInst>(dep.getInst());
      
      if (C) {
        bool changed = performCallSlotOptzn(LI,
                        SI->getPointerOperand()->stripPointerCasts(), 
                        LI->getPointerOperand()->stripPointerCasts(),
                        TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
        if (changed) {
          MD->removeInstruction(SI);
          SI->eraseFromParent();
          LI->eraseFromParent();
          ++NumMemCpyInstr;
          return true;
        }
      }
    }
  }
  
  LLVMContext &Context = SI->getContext();

  // There are two cases that are interesting for this code to handle: memcpy
  // and memset.  Right now we only handle memset.
  
  // Ensure that the value being stored is something that can be memset'able a
  // byte at a time like "0" or "-1" or any width, as well as things like
  // 0xA0A0A0A0 and 0.0.
  Value *ByteVal = isBytewiseValue(SI->getOperand(0));
  if (!ByteVal)
    return false;

  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  Module *M = SI->getParent()->getParent()->getParent();

  // Okay, so we now have a single store that can be splatable.  Scan to find
  // all subsequent stores of the same value to offset from the same pointer.
  // Join these together into ranges, so we can decide whether contiguous blocks
  // are stored.
  MemsetRanges Ranges(*TD);
  
  Value *StartPtr = SI->getPointerOperand();
  
  BasicBlock::iterator BI = SI;
  for (++BI; !isa<TerminatorInst>(BI); ++BI) {
    if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { 
      // If the call is readnone, ignore it, otherwise bail out.  We don't even
      // allow readonly here because we don't want something like:
      // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
      if (AA.getModRefBehavior(CallSite(BI)) ==
            AliasAnalysis::DoesNotAccessMemory)
        continue;
      
      // TODO: If this is a memset, try to join it in.
      
      break;
    } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
      break;

    // If this is a non-store instruction it is fine, ignore it.
    StoreInst *NextStore = dyn_cast<StoreInst>(BI);
    if (NextStore == 0) continue;
    
    // If this is a store, see if we can merge it in.
    if (NextStore->isVolatile()) break;
    
    // Check to see if this stored value is of the same byte-splattable value.
    if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
      break;

    // Check to see if this store is to a constant offset from the start ptr.
    int64_t Offset;
    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
      break;

    Ranges.addStore(Offset, NextStore);
  }

  // If we have no ranges, then we just had a single store with nothing that
  // could be merged in.  This is a very common case of course.
  if (Ranges.empty())
    return false;
  
  // If we had at least one store that could be merged in, add the starting
  // store as well.  We try to avoid this unless there is at least something
  // interesting as a small compile-time optimization.
  Ranges.addStore(0, SI);
  
  
  // Now that we have full information about ranges, loop over the ranges and
  // emit memset's for anything big enough to be worthwhile.
  bool MadeChange = false;
  for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
       I != E; ++I) {
    const MemsetRange &Range = *I;

    if (Range.TheStores.size() == 1) continue;
    
    // If it is profitable to lower this range to memset, do so now.
    if (!Range.isProfitableToUseMemset(*TD))
      continue;
    
    // Otherwise, we do want to transform this!  Create a new memset.  We put
    // the memset right before the first instruction that isn't part of this
    // memset block.  This ensure that the memset is dominated by any addressing
    // instruction needed by the start of the block.
    BasicBlock::iterator InsertPt = BI;

    // Get the starting pointer of the block.
    StartPtr = Range.StartPtr;

    // Determine alignment
    unsigned Alignment = Range.Alignment;
    if (Alignment == 0) {
      const Type *EltType = 
         cast<PointerType>(StartPtr->getType())->getElementType();
      Alignment = TD->getABITypeAlignment(EltType);
    }

    // Cast the start ptr to be i8* as memset requires.
    const PointerType* StartPTy = cast<PointerType>(StartPtr->getType());
    const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
                                                  StartPTy->getAddressSpace());
    if (StartPTy!= i8Ptr)
      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
                                 InsertPt);

    Value *Ops[] = {
      StartPtr, ByteVal,   // Start, value
      // size
      ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
      // align
      ConstantInt::get(Type::getInt32Ty(Context), Alignment),
      // volatile
      ConstantInt::getFalse(Context),
    };
    const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };

    Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);

    Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
    DEBUG(dbgs() << "Replace stores:\n";
          for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
            dbgs() << *Range.TheStores[i] << '\n';
          dbgs() << "With: " << *C << '\n'); C=C;
  
    // Don't invalidate the iterator
    BBI = BI;
  
    // Zap all the stores.
    for (SmallVector<StoreInst*, 16>::const_iterator
         SI = Range.TheStores.begin(),
         SE = Range.TheStores.end(); SI != SE; ++SI)
      (*SI)->eraseFromParent();
    ++NumMemSetInfer;
    MadeChange = true;
  }
  
  return MadeChange;
}