/// HandleFree - Handle frees of entire structures whose dependency is a store
/// to a field of that structure.
bool DSE::HandleFree(CallInst *F) {
  bool MadeChange = false;

  AliasAnalysis::Location Loc = AliasAnalysis::Location(F->getOperand(0));
  SmallVector<BasicBlock *, 16> Blocks;
  Blocks.push_back(F->getParent());

  while (!Blocks.empty()) {
    BasicBlock *BB = Blocks.pop_back_val();
    Instruction *InstPt = BB->getTerminator();
    if (BB == F->getParent()) InstPt = F;

    MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
    while (Dep.isDef() || Dep.isClobber()) {
      Instruction *Dependency = Dep.getInst();
      if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency))
        break;

      Value *DepPointer =
        GetUnderlyingObject(getStoredPointerOperand(Dependency));

      // Check for aliasing.
      if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
        break;

      Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));

      // DCE instructions only used to calculate that store
      DeleteDeadInstruction(Dependency, *MD, TLI);
      ++NumFastStores;
      MadeChange = true;

      // Inst's old Dependency is now deleted. Compute the next dependency,
      // which may also be dead, as in
      //    s[0] = 0;
      //    s[1] = 0; // This has just been deleted.
      //    free(s);
      Dep = MD->getPointerDependencyFrom(Loc, false, Next, BB);
    }

    if (Dep.isNonLocal())
      FindUnconditionalPreds(Blocks, BB, DT);
  }

  return MadeChange;
}
/// removeInstruction - Remove an instruction from the dependence analysis,
/// updating the dependence of instructions that previously depended on it.
/// This method attempts to keep the cache coherent using the reverse map.
void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
  // Walk through the Non-local dependencies, removing this one as the value
  // for any cached queries.
  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
  if (NLDI != NonLocalDeps.end()) {
    NonLocalDepInfo &BlockMap = NLDI->second.first;
    for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
         DI != DE; ++DI)
      if (Instruction *Inst = DI->second.getInst())
        RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
    NonLocalDeps.erase(NLDI);
  }

  // If we have a cached local dependence query for this instruction, remove it.
  //
  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
  if (LocalDepEntry != LocalDeps.end()) {
    // Remove us from DepInst's reverse set now that the local dep info is gone.
    if (Instruction *Inst = LocalDepEntry->second.getInst())
      RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);

    // Remove this local dependency info.
    LocalDeps.erase(LocalDepEntry);
  }
  
  // If we have any cached pointer dependencies on this instruction, remove
  // them.  If the instruction has non-pointer type, then it can't be a pointer
  // base.
  
  // Remove it from both the load info and the store info.  The instruction
  // can't be in either of these maps if it is non-pointer.
  if (isa<PointerType>(RemInst->getType())) {
    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
  }
  
  // Loop over all of the things that depend on the instruction we're removing.
  // 
  SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;

  // If we find RemInst as a clobber or Def in any of the maps for other values,
  // we need to replace its entry with a dirty version of the instruction after
  // it.  If RemInst is a terminator, we use a null dirty value.
  //
  // Using a dirty version of the instruction after RemInst saves having to scan
  // the entire block to get to this point.
  MemDepResult NewDirtyVal;
  if (!RemInst->isTerminator())
    NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
  
  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
  if (ReverseDepIt != ReverseLocalDeps.end()) {
    SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
    // RemInst can't be the terminator if it has local stuff depending on it.
    assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
           "Nothing can locally depend on a terminator");
    
    for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
         E = ReverseDeps.end(); I != E; ++I) {
      Instruction *InstDependingOnRemInst = *I;
      assert(InstDependingOnRemInst != RemInst &&
             "Already removed our local dep info");
                        
      LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
      
      // Make sure to remember that new things depend on NewDepInst.
      assert(NewDirtyVal.getInst() && "There is no way something else can have "
             "a local dep on this if it is a terminator!");
      ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), 
                                                InstDependingOnRemInst));
    }
    
    ReverseLocalDeps.erase(ReverseDepIt);

    // Add new reverse deps after scanning the set, to avoid invalidating the
    // 'ReverseDeps' reference.
    while (!ReverseDepsToAdd.empty()) {
      ReverseLocalDeps[ReverseDepsToAdd.back().first]
        .insert(ReverseDepsToAdd.back().second);
      ReverseDepsToAdd.pop_back();
    }
  }
  
  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
    SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
    for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
         I != E; ++I) {
      assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
      
      PerInstNLInfo &INLD = NonLocalDeps[*I];
      // The information is now dirty!
      INLD.second = true;
      
      for (NonLocalDepInfo::iterator DI = INLD.first.begin(), 
           DE = INLD.first.end(); DI != DE; ++DI) {
        if (DI->second.getInst() != RemInst) continue;
        
        // Convert to a dirty entry for the subsequent instruction.
        DI->second = NewDirtyVal;
        
        if (Instruction *NextI = NewDirtyVal.getInst())
          ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
      }
    }

    ReverseNonLocalDeps.erase(ReverseDepIt);

    // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
    while (!ReverseDepsToAdd.empty()) {
      ReverseNonLocalDeps[ReverseDepsToAdd.back().first]
        .insert(ReverseDepsToAdd.back().second);
      ReverseDepsToAdd.pop_back();
    }
  }
  
  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
  // value in the NonLocalPointerDeps info.
  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
    ReverseNonLocalPtrDeps.find(RemInst);
  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
    SmallPtrSet<void*, 4> &Set = ReversePtrDepIt->second;
    SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
    
    for (SmallPtrSet<void*, 4>::iterator I = Set.begin(), E = Set.end();
         I != E; ++I) {
      ValueIsLoadPair P;
      P.setFromOpaqueValue(*I);
      assert(P.getPointer() != RemInst &&
             "Already removed NonLocalPointerDeps info for RemInst");
      
      NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].second;
      
      // The cache is not valid for any specific block anymore.
      NonLocalPointerDeps[P].first = BBSkipFirstBlockPair();
      
      // Update any entries for RemInst to use the instruction after it.
      for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
           DI != DE; ++DI) {
        if (DI->second.getInst() != RemInst) continue;
        
        // Convert to a dirty entry for the subsequent instruction.
        DI->second = NewDirtyVal;
        
        if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
          ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
      }
      
      // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
      // subsequent value may invalidate the sortedness.
      std::sort(NLPDI.begin(), NLPDI.end());
    }
    
    ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
    
    while (!ReversePtrDepsToAdd.empty()) {
      ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
        .insert(ReversePtrDepsToAdd.back().second.getOpaqueValue());
      ReversePtrDepsToAdd.pop_back();
    }
  }
  
  
  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
  AA->deleteValue(RemInst);
  DEBUG(verifyRemoved(RemInst));
}
/// GetNonLocalInfoForBlock - Compute the memdep value for BB with
/// Pointer/PointeeSize using either cached information in Cache or by doing a
/// lookup (which may use dirty cache info if available).  If we do a lookup,
/// add the result to the cache.
MemDepResult MemoryDependenceAnalysis::
GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
                        bool isLoad, BasicBlock *BB,
                        NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
  
  // Do a binary search to see if we already have an entry for this block in
  // the cache set.  If so, find it.
  NonLocalDepInfo::iterator Entry =
    std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
                     std::make_pair(BB, MemDepResult()));
  if (Entry != Cache->begin() && prior(Entry)->first == BB)
    --Entry;
  
  MemDepResult *ExistingResult = 0;
  if (Entry != Cache->begin()+NumSortedEntries && Entry->first == BB)
    ExistingResult = &Entry->second;
  
  // If we have a cached entry, and it is non-dirty, use it as the value for
  // this dependency.
  if (ExistingResult && !ExistingResult->isDirty()) {
    ++NumCacheNonLocalPtr;
    return *ExistingResult;
  }    
  
  // Otherwise, we have to scan for the value.  If we have a dirty cache
  // entry, start scanning from its position, otherwise we scan from the end
  // of the block.
  BasicBlock::iterator ScanPos = BB->end();
  if (ExistingResult && ExistingResult->getInst()) {
    assert(ExistingResult->getInst()->getParent() == BB &&
           "Instruction invalidated?");
    ++NumCacheDirtyNonLocalPtr;
    ScanPos = ExistingResult->getInst();
    
    // Eliminating the dirty entry from 'Cache', so update the reverse info.
    ValueIsLoadPair CacheKey(Pointer, isLoad);
    RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos,
                         CacheKey.getOpaqueValue());
  } else {
    ++NumUncacheNonLocalPtr;
  }
  
  // Scan the block for the dependency.
  MemDepResult Dep = getPointerDependencyFrom(Pointer, PointeeSize, isLoad, 
                                              ScanPos, BB);
  
  // If we had a dirty entry for the block, update it.  Otherwise, just add
  // a new entry.
  if (ExistingResult)
    *ExistingResult = Dep;
  else
    Cache->push_back(std::make_pair(BB, Dep));
  
  // If the block has a dependency (i.e. it isn't completely transparent to
  // the value), remember the reverse association because we just added it
  // to Cache!
  if (Dep.isNonLocal())
    return Dep;
  
  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
  // update MemDep when we remove instructions.
  Instruction *Inst = Dep.getInst();
  assert(Inst && "Didn't depend on anything?");
  ValueIsLoadPair CacheKey(Pointer, isLoad);
  ReverseNonLocalPtrDeps[Inst].insert(CacheKey.getOpaqueValue());
  return Dep;
}
/// getNonLocalCallDependency - Perform a full dependency query for the
/// specified call, returning the set of blocks that the value is
/// potentially live across.  The returned set of results will include a
/// "NonLocal" result for all blocks where the value is live across.
///
/// This method assumes the instruction returns a "NonLocal" dependency
/// within its own block.
///
/// This returns a reference to an internal data structure that may be
/// invalidated on the next non-local query or when an instruction is
/// removed.  Clients must copy this data if they want it around longer than
/// that.
const MemoryDependenceAnalysis::NonLocalDepInfo &
MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
  assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
 "getNonLocalCallDependency should only be used on calls with non-local deps!");
  PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
  NonLocalDepInfo &Cache = CacheP.first;

  /// DirtyBlocks - This is the set of blocks that need to be recomputed.  In
  /// the cached case, this can happen due to instructions being deleted etc. In
  /// the uncached case, this starts out as the set of predecessors we care
  /// about.
  SmallVector<BasicBlock*, 32> DirtyBlocks;
  
  if (!Cache.empty()) {
    // Okay, we have a cache entry.  If we know it is not dirty, just return it
    // with no computation.
    if (!CacheP.second) {
      NumCacheNonLocal++;
      return Cache;
    }
    
    // If we already have a partially computed set of results, scan them to
    // determine what is dirty, seeding our initial DirtyBlocks worklist.
    for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
       I != E; ++I)
      if (I->second.isDirty())
        DirtyBlocks.push_back(I->first);
    
    // Sort the cache so that we can do fast binary search lookups below.
    std::sort(Cache.begin(), Cache.end());
    
    ++NumCacheDirtyNonLocal;
    //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
    //     << Cache.size() << " cached: " << *QueryInst;
  } else {
    // Seed DirtyBlocks with each of the preds of QueryInst's block.
    BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
    for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
      DirtyBlocks.push_back(*PI);
    NumUncacheNonLocal++;
  }
  
  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
  bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);

  SmallPtrSet<BasicBlock*, 64> Visited;
  
  unsigned NumSortedEntries = Cache.size();
  DEBUG(AssertSorted(Cache));
  
  // Iterate while we still have blocks to update.
  while (!DirtyBlocks.empty()) {
    BasicBlock *DirtyBB = DirtyBlocks.back();
    DirtyBlocks.pop_back();
    
    // Already processed this block?
    if (!Visited.insert(DirtyBB))
      continue;
    
    // Do a binary search to see if we already have an entry for this block in
    // the cache set.  If so, find it.
    DEBUG(AssertSorted(Cache, NumSortedEntries));
    NonLocalDepInfo::iterator Entry = 
      std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
                       std::make_pair(DirtyBB, MemDepResult()));
    if (Entry != Cache.begin() && prior(Entry)->first == DirtyBB)
      --Entry;
    
    MemDepResult *ExistingResult = 0;
    if (Entry != Cache.begin()+NumSortedEntries && 
        Entry->first == DirtyBB) {
      // If we already have an entry, and if it isn't already dirty, the block
      // is done.
      if (!Entry->second.isDirty())
        continue;
      
      // Otherwise, remember this slot so we can update the value.
      ExistingResult = &Entry->second;
    }
    
    // If the dirty entry has a pointer, start scanning from it so we don't have
    // to rescan the entire block.
    BasicBlock::iterator ScanPos = DirtyBB->end();
    if (ExistingResult) {
      if (Instruction *Inst = ExistingResult->getInst()) {
        ScanPos = Inst;
        // We're removing QueryInst's use of Inst.
        RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
                             QueryCS.getInstruction());
      }
    }
    
    // Find out if this block has a local dependency for QueryInst.
    MemDepResult Dep;
    
    if (ScanPos != DirtyBB->begin()) {
      Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
    } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
      // No dependence found.  If this is the entry block of the function, it is
      // a clobber, otherwise it is non-local.
      Dep = MemDepResult::getNonLocal();
    } else {
      Dep = MemDepResult::getClobber(ScanPos);
    }
    
    // If we had a dirty entry for the block, update it.  Otherwise, just add
    // a new entry.
    if (ExistingResult)
      *ExistingResult = Dep;
    else
      Cache.push_back(std::make_pair(DirtyBB, Dep));
    
    // If the block has a dependency (i.e. it isn't completely transparent to
    // the value), remember the association!
    if (!Dep.isNonLocal()) {
      // Keep the ReverseNonLocalDeps map up to date so we can efficiently
      // update this when we remove instructions.
      if (Instruction *Inst = Dep.getInst())
        ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
    } else {
    
      // If the block *is* completely transparent to the load, we need to check
      // the predecessors of this block.  Add them to our worklist.
      for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
        DirtyBlocks.push_back(*PI);
    }
  }
  
  return Cache;
}
Example #5
0
/// processMemCpy - perform simplification of memcpy's.  If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
/// circumstances). This allows later passes to remove the first memcpy
/// altogether.
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
  // We can only optimize non-volatile memcpy's.
  if (M->isVolatile()) return false;

  // If the source and destination of the memcpy are the same, then zap it.
  if (M->getSource() == M->getDest()) {
    MD->removeInstruction(M);
    M->eraseFromParent();
    return false;
  }

  // If copying from a constant, try to turn the memcpy into a memset.
  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
    if (GV->isConstant() && GV->hasDefinitiveInitializer())
      if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
        IRBuilder<> Builder(M);
        Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
                             M->getAlignment(), false);
        MD->removeInstruction(M);
        M->eraseFromParent();
        ++NumCpyToSet;
        return true;
      }

  MemDepResult DepInfo = MD->getDependency(M);

  // Try to turn a partially redundant memset + memcpy into
  // memcpy + smaller memset.  We don't need the memcpy size for this.
  if (DepInfo.isClobber())
    if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
      if (processMemSetMemCpyDependence(M, MDep))
        return true;

  // The optimizations after this point require the memcpy size.
  ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
  if (!CopySize) return false;

  // There are four possible optimizations we can do for memcpy:
  //   a) memcpy-memcpy xform which exposes redundance for DSE.
  //   b) call-memcpy xform for return slot optimization.
  //   c) memcpy from freshly alloca'd space or space that has just started its
  //      lifetime copies undefined data, and we can therefore eliminate the
  //      memcpy in favor of the data that was already at the destination.
  //   d) memcpy from a just-memset'd source can be turned into memset.
  if (DepInfo.isClobber()) {
    if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
      if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
                               CopySize->getZExtValue(), M->getAlignment(),
                               C)) {
        MD->removeInstruction(M);
        M->eraseFromParent();
        return true;
      }
    }
  }

  AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
  MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
                                                         M, M->getParent());

  if (SrcDepInfo.isClobber()) {
    if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
      return processMemCpyMemCpyDependence(M, MDep);
  } else if (SrcDepInfo.isDef()) {
    Instruction *I = SrcDepInfo.getInst();
    bool hasUndefContents = false;

    if (isa<AllocaInst>(I)) {
      hasUndefContents = true;
    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
      if (II->getIntrinsicID() == Intrinsic::lifetime_start)
        if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
          if (LTSize->getZExtValue() >= CopySize->getZExtValue())
            hasUndefContents = true;
    }

    if (hasUndefContents) {
      MD->removeInstruction(M);
      M->eraseFromParent();
      ++NumMemCpyInstr;
      return true;
    }
  }

  if (SrcDepInfo.isClobber())
    if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
      if (performMemCpyToMemSetOptzn(M, MDep)) {
        MD->removeInstruction(M);
        M->eraseFromParent();
        ++NumCpyToSet;
        return true;
      }

  return false;
}
Example #6
0
bool DSE::runOnBasicBlock(BasicBlock &BB) {
  bool MadeChange = false;

  // Do a top-down walk on the BB.
  for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
    Instruction *Inst = BBI++;

    // Handle 'free' calls specially.
    if (CallInst *F = isFreeCall(Inst, TLI)) {
      MadeChange |= HandleFree(F);
      continue;
    }

    // If we find something that writes memory, get its memory dependence.
    if (!hasMemoryWrite(Inst, TLI))
      continue;

    MemDepResult InstDep = MD->getDependency(Inst);

    // Ignore any store where we can't find a local dependence.
    // FIXME: cross-block DSE would be fun. :)
    if (!InstDep.isDef() && !InstDep.isClobber())
      continue;

    // If we're storing the same value back to a pointer that we just
    // loaded from, then the store can be removed.
    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
      if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
        if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
            SI->getOperand(0) == DepLoad && isRemovable(SI)) {
          DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  "
                       << "LOAD: " << *DepLoad << "\n  STORE: " << *SI << '\n');

          // DeleteDeadInstruction can delete the current instruction.  Save BBI
          // in case we need it.
          WeakVH NextInst(BBI);

          DeleteDeadInstruction(SI, *MD, TLI);

          if (!NextInst)  // Next instruction deleted.
            BBI = BB.begin();
          else if (BBI != BB.begin())  // Revisit this instruction if possible.
            --BBI;
          ++NumFastStores;
          MadeChange = true;
          continue;
        }
      }
    }

    // Figure out what location is being stored to.
    AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);

    // If we didn't get a useful location, fail.
    if (!Loc.Ptr)
      continue;

    while (InstDep.isDef() || InstDep.isClobber()) {
      // Get the memory clobbered by the instruction we depend on.  MemDep will
      // skip any instructions that 'Loc' clearly doesn't interact with.  If we
      // end up depending on a may- or must-aliased load, then we can't optimize
      // away the store and we bail out.  However, if we depend on on something
      // that overwrites the memory location we *can* potentially optimize it.
      //
      // Find out what memory location the dependent instruction stores.
      Instruction *DepWrite = InstDep.getInst();
      AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
      // If we didn't get a useful location, or if it isn't a size, bail out.
      if (!DepLoc.Ptr)
        break;

      // If we find a write that is a) removable (i.e., non-volatile), b) is
      // completely obliterated by the store to 'Loc', and c) which we know that
      // 'Inst' doesn't load from, then we can remove it.
      if (isRemovable(DepWrite) &&
          !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
        int64_t InstWriteOffset, DepWriteOffset;
        OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA,
                                         DepWriteOffset, InstWriteOffset);
        if (OR == OverwriteComplete) {
          DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
                << *DepWrite << "\n  KILLER: " << *Inst << '\n');

          // Delete the store and now-dead instructions that feed it.
          DeleteDeadInstruction(DepWrite, *MD, TLI);
          ++NumFastStores;
          MadeChange = true;

          // DeleteDeadInstruction can delete the current instruction in loop
          // cases, reset BBI.
          BBI = Inst;
          if (BBI != BB.begin())
            --BBI;
          break;
        } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
          // TODO: base this on the target vector size so that if the earlier
          // store was too small to get vector writes anyway then its likely
          // a good idea to shorten it
          // Power of 2 vector writes are probably always a bad idea to optimize
          // as any store/memset/memcpy is likely using vector instructions so
          // shortening it to not vector size is likely to be slower
          MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
          unsigned DepWriteAlign = DepIntrinsic->getAlignment();
          if (llvm::isPowerOf2_64(InstWriteOffset) ||
              ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {

            DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW END: "
                  << *DepWrite << "\n  KILLER (offset "
                  << InstWriteOffset << ", "
                  << DepLoc.Size << ")"
                  << *Inst << '\n');

            Value* DepWriteLength = DepIntrinsic->getLength();
            Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
                                                    InstWriteOffset -
                                                    DepWriteOffset);
            DepIntrinsic->setLength(TrimmedLength);
            MadeChange = true;
          }
        }
      }

      // If this is a may-aliased store that is clobbering the store value, we
      // can keep searching past it for another must-aliased pointer that stores
      // to the same location.  For example, in:
      //   store -> P
      //   store -> Q
      //   store -> P
      // we can remove the first store to P even though we don't know if P and Q
      // alias.
      if (DepWrite == &BB.front()) break;

      // Can't look past this instruction if it might read 'Loc'.
      if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
        break;

      InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
    }
  }

  // If this block ends in a return, unwind, or unreachable, all allocas are
  // dead at its end, which means stores to them are also dead.
  if (BB.getTerminator()->getNumSuccessors() == 0)
    MadeChange |= handleEndBlock(BB);

  return MadeChange;
}
Example #7
0
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
  if (SI->isVolatile()) return false;
  
  if (TD == 0) return false;

  // Detect cases where we're performing call slot forwarding, but
  // happen to be using a load-store pair to implement it, rather than
  // a memcpy.
  if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
    if (!LI->isVolatile() && LI->hasOneUse()) {
      MemDepResult ldep = MD->getDependency(LI);
      CallInst *C = 0;
      if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
        C = dyn_cast<CallInst>(ldep.getInst());

      if (C) {
        // Check that nothing touches the dest of the "copy" between
        // the call and the store.
        MemDepResult sdep = MD->getDependency(SI);
        if (!sdep.isNonLocal()) {
          bool FoundCall = false;
          for (BasicBlock::iterator I = SI, E = sdep.getInst(); I != E; --I) {
            if (&*I == C) {
              FoundCall = true;
              break;
            }
          }
          if (!FoundCall)
            C = 0;
        }
      }

      if (C) {
        bool changed = performCallSlotOptzn(LI,
                        SI->getPointerOperand()->stripPointerCasts(), 
                        LI->getPointerOperand()->stripPointerCasts(),
                        TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
        if (changed) {
          MD->removeInstruction(SI);
          SI->eraseFromParent();
          MD->removeInstruction(LI);
          LI->eraseFromParent();
          ++NumMemCpyInstr;
          return true;
        }
      }
    }
  }
  
  // There are two cases that are interesting for this code to handle: memcpy
  // and memset.  Right now we only handle memset.
  
  // Ensure that the value being stored is something that can be memset'able a
  // byte at a time like "0" or "-1" or any width, as well as things like
  // 0xA0A0A0A0 and 0.0.
  if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
    if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
                                              ByteVal)) {
      BBI = I;  // Don't invalidate iterator.
      return true;
    }
  
  return false;
}
/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
/// memory dependence of memcpy 'M' is the memcpy 'MDep'.  Try to simplify M to
/// copy from MDep's input if we can.  MSize is the size of M's copy.
///
bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
        uint64_t MSize) {
    // We can only transforms memcpy's where the dest of one is the source of the
    // other.
    if (M->getSource() != MDep->getDest() || MDep->isVolatile())
        return false;

    // If dep instruction is reading from our current input, then it is a noop
    // transfer and substituting the input won't change this instruction.  Just
    // ignore the input and let someone else zap MDep.  This handles cases like:
    //    memcpy(a <- a)
    //    memcpy(b <- a)
    if (M->getSource() == MDep->getSource())
        return false;

    // Second, the length of the memcpy's must be the same, or the preceding one
    // must be larger than the following one.
    ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
    ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
    if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
        return false;

    AliasAnalysis &AA = getAnalysis<AliasAnalysis>();

    // Verify that the copied-from memory doesn't change in between the two
    // transfers.  For example, in:
    //    memcpy(a <- b)
    //    *b = 42;
    //    memcpy(c <- a)
    // It would be invalid to transform the second memcpy into memcpy(c <- b).
    //
    // TODO: If the code between M and MDep is transparent to the destination "c",
    // then we could still perform the xform by moving M up to the first memcpy.
    //
    // NOTE: This is conservative, it will stop on any read from the source loc,
    // not just the defining memcpy.
    MemDepResult SourceDep =
        MD->getPointerDependencyFrom(AA.getLocationForSource(MDep),
                                     false, M, M->getParent());
    if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
        return false;

    // If the dest of the second might alias the source of the first, then the
    // source and dest might overlap.  We still want to eliminate the intermediate
    // value, but we have to generate a memmove instead of memcpy.
    bool UseMemMove = false;
    if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
        UseMemMove = true;

    // If all checks passed, then we can transform M.

    // Make sure to use the lesser of the alignment of the source and the dest
    // since we're changing where we're reading from, but don't want to increase
    // the alignment past what can be read from or written to.
    // TODO: Is this worth it if we're creating a less aligned memcpy? For
    // example we could be moving from movaps -> movq on x86.
    unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());

    IRBuilder<> Builder(M);
    if (UseMemMove)
        Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
                              Align, M->isVolatile());
    else
        Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
                             Align, M->isVolatile());

    // Remove the instruction we're replacing.
    MD->removeInstruction(M);
    M->eraseFromParent();
    ++NumMemCpyInstr;
    return true;
}
Example #9
0
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
  if (!SI->isSimple()) return false;

  // Avoid merging nontemporal stores since the resulting
  // memcpy/memset would not be able to preserve the nontemporal hint.
  // In theory we could teach how to propagate the !nontemporal metadata to
  // memset calls. However, that change would force the backend to
  // conservatively expand !nontemporal memset calls back to sequences of
  // store instructions (effectively undoing the merging).
  if (SI->getMetadata(LLVMContext::MD_nontemporal))
    return false;

  const DataLayout &DL = SI->getModule()->getDataLayout();

  // Detect cases where we're performing call slot forwarding, but
  // happen to be using a load-store pair to implement it, rather than
  // a memcpy.
  if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
    if (LI->isSimple() && LI->hasOneUse() &&
        LI->getParent() == SI->getParent()) {
      MemDepResult ldep = MD->getDependency(LI);
      CallInst *C = nullptr;
      if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
        C = dyn_cast<CallInst>(ldep.getInst());

      if (C) {
        // Check that nothing touches the dest of the "copy" between
        // the call and the store.
        AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
        MemoryLocation StoreLoc = MemoryLocation::get(SI);
        for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator();
             I != E; --I) {
          if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
            C = nullptr;
            break;
          }
        }
      }

      if (C) {
        unsigned storeAlign = SI->getAlignment();
        if (!storeAlign)
          storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
        unsigned loadAlign = LI->getAlignment();
        if (!loadAlign)
          loadAlign = DL.getABITypeAlignment(LI->getType());

        bool changed = performCallSlotOptzn(
            LI, SI->getPointerOperand()->stripPointerCasts(),
            LI->getPointerOperand()->stripPointerCasts(),
            DL.getTypeStoreSize(SI->getOperand(0)->getType()),
            std::min(storeAlign, loadAlign), C);
        if (changed) {
          MD->removeInstruction(SI);
          SI->eraseFromParent();
          MD->removeInstruction(LI);
          LI->eraseFromParent();
          ++NumMemCpyInstr;
          return true;
        }
      }
    }
  }

  // There are two cases that are interesting for this code to handle: memcpy
  // and memset.  Right now we only handle memset.

  // Ensure that the value being stored is something that can be memset'able a
  // byte at a time like "0" or "-1" or any width, as well as things like
  // 0xA0A0A0A0 and 0.0.
  if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
    if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
                                              ByteVal)) {
      BBI = I->getIterator(); // Don't invalidate iterator.
      return true;
    }

  return false;
}
bool DSE::runOnBasicBlock(BasicBlock &BB) {
  bool MadeChange = false;
  
  // Do a top-down walk on the BB.
  for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
    Instruction *Inst = BBI++;
    
    // Handle 'free' calls specially.
    if (CallInst *F = isFreeCall(Inst)) {
      MadeChange |= HandleFree(F);
      continue;
    }
    
    // If we find something that writes memory, get its memory dependence.
    if (!hasMemoryWrite(Inst))
      continue;

    MemDepResult InstDep = MD->getDependency(Inst);
    
    // Ignore non-local store liveness.
    // FIXME: cross-block DSE would be fun. :)
    if (InstDep.isNonLocal() || 
        // Ignore self dependence, which happens in the entry block of the
        // function.
        InstDep.getInst() == Inst)
      continue;
     
    // If we're storing the same value back to a pointer that we just
    // loaded from, then the store can be removed.
    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
      if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
        if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
            SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
          DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  "
                       << "LOAD: " << *DepLoad << "\n  STORE: " << *SI << '\n');
          
          // DeleteDeadInstruction can delete the current instruction.  Save BBI
          // in case we need it.
          WeakVH NextInst(BBI);
          
          DeleteDeadInstruction(SI, *MD);
          
          if (NextInst == 0)  // Next instruction deleted.
            BBI = BB.begin();
          else if (BBI != BB.begin())  // Revisit this instruction if possible.
            --BBI;
          ++NumFastStores;
          MadeChange = true;
          continue;
        }
      }
    }
    
    // Figure out what location is being stored to.
    AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);

    // If we didn't get a useful location, fail.
    if (Loc.Ptr == 0)
      continue;
    
    while (!InstDep.isNonLocal()) {
      // Get the memory clobbered by the instruction we depend on.  MemDep will
      // skip any instructions that 'Loc' clearly doesn't interact with.  If we
      // end up depending on a may- or must-aliased load, then we can't optimize
      // away the store and we bail out.  However, if we depend on on something
      // that overwrites the memory location we *can* potentially optimize it.
      //
      // Find out what memory location the dependant instruction stores.
      Instruction *DepWrite = InstDep.getInst();
      AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
      // If we didn't get a useful location, or if it isn't a size, bail out.
      if (DepLoc.Ptr == 0)
        break;

      // If we find a write that is a) removable (i.e., non-volatile), b) is
      // completely obliterated by the store to 'Loc', and c) which we know that
      // 'Inst' doesn't load from, then we can remove it.
      if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
          !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
        DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
              << *DepWrite << "\n  KILLER: " << *Inst << '\n');
        
        // Delete the store and now-dead instructions that feed it.
        DeleteDeadInstruction(DepWrite, *MD);
        ++NumFastStores;
        MadeChange = true;
        
        // DeleteDeadInstruction can delete the current instruction in loop
        // cases, reset BBI.
        BBI = Inst;
        if (BBI != BB.begin())
          --BBI;
        break;
      }
      
      // If this is a may-aliased store that is clobbering the store value, we
      // can keep searching past it for another must-aliased pointer that stores
      // to the same location.  For example, in:
      //   store -> P
      //   store -> Q
      //   store -> P
      // we can remove the first store to P even though we don't know if P and Q
      // alias.
      if (DepWrite == &BB.front()) break;
      
      // Can't look past this instruction if it might read 'Loc'.
      if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
        break;
        
      InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
    }
  }
  
  // If this block ends in a return, unwind, or unreachable, all allocas are
  // dead at its end, which means stores to them are also dead.
  if (BB.getTerminator()->getNumSuccessors() == 0)
    MadeChange |= handleEndBlock(BB);
  
  return MadeChange;
}
Example #11
0
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
  if (!SI->isSimple()) return false;

  // Avoid merging nontemporal stores since the resulting
  // memcpy/memset would not be able to preserve the nontemporal hint.
  // In theory we could teach how to propagate the !nontemporal metadata to
  // memset calls. However, that change would force the backend to
  // conservatively expand !nontemporal memset calls back to sequences of
  // store instructions (effectively undoing the merging).
  if (SI->getMetadata(LLVMContext::MD_nontemporal))
    return false;

  const DataLayout &DL = SI->getModule()->getDataLayout();

  // Load to store forwarding can be interpreted as memcpy.
  if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
    if (LI->isSimple() && LI->hasOneUse() &&
        LI->getParent() == SI->getParent()) {

      auto *T = LI->getType();
      if (T->isAggregateType()) {
        AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
        MemoryLocation LoadLoc = MemoryLocation::get(LI);

        // We use alias analysis to check if an instruction may store to
        // the memory we load from in between the load and the store. If
        // such an instruction is found, we store it in AI.
        Instruction *AI = nullptr;
        for (BasicBlock::iterator I = ++LI->getIterator(), E = SI->getIterator();
             I != E; ++I) {
          if (AA.getModRefInfo(&*I, LoadLoc) & MRI_Mod) {
            AI = &*I;
            break;
          }
        }

        // If no aliasing instruction is found, then we can promote the
        // load/store pair to a memcpy at the store loaction.
        if (!AI) {
          // If we load from memory that may alias the memory we store to,
          // memmove must be used to preserve semantic. If not, memcpy can
          // be used.
          bool UseMemMove = false;
          if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))
            UseMemMove = true;

          unsigned Align = findCommonAlignment(DL, SI, LI);
          uint64_t Size = DL.getTypeStoreSize(T);

          IRBuilder<> Builder(SI);
          Instruction *M;
          if (UseMemMove)
            M = Builder.CreateMemMove(SI->getPointerOperand(),
                                      LI->getPointerOperand(), Size,
                                      Align, SI->isVolatile());
          else
            M = Builder.CreateMemCpy(SI->getPointerOperand(),
                                     LI->getPointerOperand(), Size,
                                     Align, SI->isVolatile());

          DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI
                       << " => " << *M << "\n");

          MD->removeInstruction(SI);
          SI->eraseFromParent();
          MD->removeInstruction(LI);
          LI->eraseFromParent();
          ++NumMemCpyInstr;

          // Make sure we do not invalidate the iterator.
          BBI = M->getIterator();
          return true;
        }
      }

      // Detect cases where we're performing call slot forwarding, but
      // happen to be using a load-store pair to implement it, rather than
      // a memcpy.
      MemDepResult ldep = MD->getDependency(LI);
      CallInst *C = nullptr;
      if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
        C = dyn_cast<CallInst>(ldep.getInst());

      if (C) {
        // Check that nothing touches the dest of the "copy" between
        // the call and the store.
        AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
        MemoryLocation StoreLoc = MemoryLocation::get(SI);
        for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator();
             I != E; --I) {
          if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
            C = nullptr;
            break;
          }
        }
      }

      if (C) {
        bool changed = performCallSlotOptzn(
            LI, SI->getPointerOperand()->stripPointerCasts(),
            LI->getPointerOperand()->stripPointerCasts(),
            DL.getTypeStoreSize(SI->getOperand(0)->getType()),
            findCommonAlignment(DL, SI, LI), C);
        if (changed) {
          MD->removeInstruction(SI);
          SI->eraseFromParent();
          MD->removeInstruction(LI);
          LI->eraseFromParent();
          ++NumMemCpyInstr;
          return true;
        }
      }
    }
  }

  // There are two cases that are interesting for this code to handle: memcpy
  // and memset.  Right now we only handle memset.

  // Ensure that the value being stored is something that can be memset'able a
  // byte at a time like "0" or "-1" or any width, as well as things like
  // 0xA0A0A0A0 and 0.0.
  if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
    if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
                                              ByteVal)) {
      BBI = I->getIterator(); // Don't invalidate iterator.
      return true;
    }

  return false;
}
Example #12
0
/// processMemCpy - perform simplification of memcpy's.  If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
/// circumstances). This allows later passes to remove the first memcpy
/// altogether.
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
  // We can only optimize non-volatile memcpy's.
  if (M->isVolatile()) return false;

  // If the source and destination of the memcpy are the same, then zap it.
  if (M->getSource() == M->getDest()) {
    MD->removeInstruction(M);
    M->eraseFromParent();
    return false;
  }

  // If copying from a constant, try to turn the memcpy into a memset.
  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
    if (GV->isConstant() && GV->hasDefinitiveInitializer())
      if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
        IRBuilder<> Builder(M);
        Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
                             M->getAlignment(), false);
        MD->removeInstruction(M);
        M->eraseFromParent();
        ++NumCpyToSet;
        return true;
      }

  // The optimizations after this point require the memcpy size.
  ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
  if (CopySize == 0) return false;

  // The are three possible optimizations we can do for memcpy:
  //   a) memcpy-memcpy xform which exposes redundance for DSE.
  //   b) call-memcpy xform for return slot optimization.
  //   c) memcpy from freshly alloca'd space copies undefined data, and we can
  //      therefore eliminate the memcpy in favor of the data that was already
  //      at the destination.
  MemDepResult DepInfo = MD->getDependency(M);
  if (DepInfo.isClobber()) {
    if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
      if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
                               CopySize->getZExtValue(), M->getAlignment(),
                               C)) {
        MD->removeInstruction(M);
        M->eraseFromParent();
        return true;
      }
    }
  }

  AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
  MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
                                                         M, M->getParent());
  if (SrcDepInfo.isClobber()) {
    if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
      return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
  } else if (SrcDepInfo.isDef()) {
    if (isa<AllocaInst>(SrcDepInfo.getInst())) {
      MD->removeInstruction(M);
      M->eraseFromParent();
      ++NumMemCpyInstr;
      return true;
    }
  }

  return false;
}
Example #13
0
/// processStore - When GVN is scanning forward over instructions, we look for
/// some other patterns to fold away.  In particular, this looks for stores to
/// neighboring locations of memory.  If it sees enough consequtive ones
/// (currently 4) it attempts to merge them together into a memcpy/memset.
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
  if (SI->isVolatile()) return false;
  
  TargetData *TD = getAnalysisIfAvailable<TargetData>();
  if (!TD) return false;

  // Detect cases where we're performing call slot forwarding, but
  // happen to be using a load-store pair to implement it, rather than
  // a memcpy.
  if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
    if (!LI->isVolatile() && LI->hasOneUse()) {
      MemDepResult dep = MD->getDependency(LI);
      CallInst *C = 0;
      if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
        C = dyn_cast<CallInst>(dep.getInst());
      
      if (C) {
        bool changed = performCallSlotOptzn(LI,
                        SI->getPointerOperand()->stripPointerCasts(), 
                        LI->getPointerOperand()->stripPointerCasts(),
                        TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
        if (changed) {
          MD->removeInstruction(SI);
          SI->eraseFromParent();
          LI->eraseFromParent();
          ++NumMemCpyInstr;
          return true;
        }
      }
    }
  }
  
  LLVMContext &Context = SI->getContext();

  // There are two cases that are interesting for this code to handle: memcpy
  // and memset.  Right now we only handle memset.
  
  // Ensure that the value being stored is something that can be memset'able a
  // byte at a time like "0" or "-1" or any width, as well as things like
  // 0xA0A0A0A0 and 0.0.
  Value *ByteVal = isBytewiseValue(SI->getOperand(0));
  if (!ByteVal)
    return false;

  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  Module *M = SI->getParent()->getParent()->getParent();

  // Okay, so we now have a single store that can be splatable.  Scan to find
  // all subsequent stores of the same value to offset from the same pointer.
  // Join these together into ranges, so we can decide whether contiguous blocks
  // are stored.
  MemsetRanges Ranges(*TD);
  
  Value *StartPtr = SI->getPointerOperand();
  
  BasicBlock::iterator BI = SI;
  for (++BI; !isa<TerminatorInst>(BI); ++BI) {
    if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { 
      // If the call is readnone, ignore it, otherwise bail out.  We don't even
      // allow readonly here because we don't want something like:
      // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
      if (AA.getModRefBehavior(CallSite(BI)) ==
            AliasAnalysis::DoesNotAccessMemory)
        continue;
      
      // TODO: If this is a memset, try to join it in.
      
      break;
    } else if (isa<VAArgInst>(BI) || isa<LoadInst>(BI))
      break;

    // If this is a non-store instruction it is fine, ignore it.
    StoreInst *NextStore = dyn_cast<StoreInst>(BI);
    if (NextStore == 0) continue;
    
    // If this is a store, see if we can merge it in.
    if (NextStore->isVolatile()) break;
    
    // Check to see if this stored value is of the same byte-splattable value.
    if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
      break;

    // Check to see if this store is to a constant offset from the start ptr.
    int64_t Offset;
    if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset, *TD))
      break;

    Ranges.addStore(Offset, NextStore);
  }

  // If we have no ranges, then we just had a single store with nothing that
  // could be merged in.  This is a very common case of course.
  if (Ranges.empty())
    return false;
  
  // If we had at least one store that could be merged in, add the starting
  // store as well.  We try to avoid this unless there is at least something
  // interesting as a small compile-time optimization.
  Ranges.addStore(0, SI);
  
  
  // Now that we have full information about ranges, loop over the ranges and
  // emit memset's for anything big enough to be worthwhile.
  bool MadeChange = false;
  for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
       I != E; ++I) {
    const MemsetRange &Range = *I;

    if (Range.TheStores.size() == 1) continue;
    
    // If it is profitable to lower this range to memset, do so now.
    if (!Range.isProfitableToUseMemset(*TD))
      continue;
    
    // Otherwise, we do want to transform this!  Create a new memset.  We put
    // the memset right before the first instruction that isn't part of this
    // memset block.  This ensure that the memset is dominated by any addressing
    // instruction needed by the start of the block.
    BasicBlock::iterator InsertPt = BI;

    // Get the starting pointer of the block.
    StartPtr = Range.StartPtr;

    // Determine alignment
    unsigned Alignment = Range.Alignment;
    if (Alignment == 0) {
      const Type *EltType = 
         cast<PointerType>(StartPtr->getType())->getElementType();
      Alignment = TD->getABITypeAlignment(EltType);
    }

    // Cast the start ptr to be i8* as memset requires.
    const PointerType* StartPTy = cast<PointerType>(StartPtr->getType());
    const PointerType *i8Ptr = Type::getInt8PtrTy(Context,
                                                  StartPTy->getAddressSpace());
    if (StartPTy!= i8Ptr)
      StartPtr = new BitCastInst(StartPtr, i8Ptr, StartPtr->getName(),
                                 InsertPt);

    Value *Ops[] = {
      StartPtr, ByteVal,   // Start, value
      // size
      ConstantInt::get(Type::getInt64Ty(Context), Range.End-Range.Start),
      // align
      ConstantInt::get(Type::getInt32Ty(Context), Alignment),
      // volatile
      ConstantInt::getFalse(Context),
    };
    const Type *Tys[] = { Ops[0]->getType(), Ops[2]->getType() };

    Function *MemSetF = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);

    Value *C = CallInst::Create(MemSetF, Ops, Ops+5, "", InsertPt);
    DEBUG(dbgs() << "Replace stores:\n";
          for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
            dbgs() << *Range.TheStores[i] << '\n';
          dbgs() << "With: " << *C << '\n'); C=C;
  
    // Don't invalidate the iterator
    BBI = BI;
  
    // Zap all the stores.
    for (SmallVector<StoreInst*, 16>::const_iterator
         SI = Range.TheStores.begin(),
         SE = Range.TheStores.end(); SI != SE; ++SI)
      (*SI)->eraseFromParent();
    ++NumMemSetInfer;
    MadeChange = true;
  }
  
  return MadeChange;
}
Example #14
0
/// processMemCpy - perform simplication of memcpy's.  If we have memcpy A which
/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
///  This allows later passes to remove the first memcpy altogether.
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
  MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();

  // The are two possible optimizations we can do for memcpy:
  //   a) memcpy-memcpy xform which exposes redundance for DSE.
  //   b) call-memcpy xform for return slot optimization.
  MemDepResult dep = MD.getDependency(M);
  if (!dep.isClobber())
    return false;
  if (!isa<MemCpyInst>(dep.getInst())) {
    if (CallInst *C = dyn_cast<CallInst>(dep.getInst()))
      return performCallSlotOptzn(M, C);
    return false;
  }
  
  MemCpyInst *MDep = cast<MemCpyInst>(dep.getInst());
  
  // We can only transforms memcpy's where the dest of one is the source of the
  // other
  if (M->getSource() != MDep->getDest())
    return false;
  
  // Second, the length of the memcpy's must be the same, or the preceeding one
  // must be larger than the following one.
  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
  ConstantInt *C2 = dyn_cast<ConstantInt>(M->getLength());
  if (!C1 || !C2)
    return false;
  
  uint64_t DepSize = C1->getValue().getZExtValue();
  uint64_t CpySize = C2->getValue().getZExtValue();
  
  if (DepSize < CpySize)
    return false;
  
  // Finally, we have to make sure that the dest of the second does not
  // alias the source of the first
  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
  if (AA.alias(M->getRawDest(), CpySize, MDep->getRawSource(), DepSize) !=
      AliasAnalysis::NoAlias)
    return false;
  else if (AA.alias(M->getRawDest(), CpySize, M->getRawSource(), CpySize) !=
           AliasAnalysis::NoAlias)
    return false;
  else if (AA.alias(MDep->getRawDest(), DepSize, MDep->getRawSource(), DepSize)
           != AliasAnalysis::NoAlias)
    return false;
  
  // If all checks passed, then we can transform these memcpy's
  const Type *ArgTys[3] = { M->getRawDest()->getType(),
                            MDep->getRawSource()->getType(),
                            M->getLength()->getType() };
  Function *MemCpyFun = Intrinsic::getDeclaration(
                                 M->getParent()->getParent()->getParent(),
                                 M->getIntrinsicID(), ArgTys, 3);
    
  Value *Args[5] = {
    M->getRawDest(), MDep->getRawSource(), M->getLength(),
    M->getAlignmentCst(), M->getVolatileCst()
  };
  
  CallInst *C = CallInst::Create(MemCpyFun, Args, Args+5, "", M);
  
  
  // If C and M don't interfere, then this is a valid transformation.  If they
  // did, this would mean that the two sources overlap, which would be bad.
  if (MD.getDependency(C) == dep) {
    MD.removeInstruction(M);
    M->eraseFromParent();
    ++NumMemCpyInstr;
    return true;
  }
  
  // Otherwise, there was no point in doing this, so we remove the call we
  // inserted and act like nothing happened.
  MD.removeInstruction(C);
  C->eraseFromParent();
  return false;
}
void DataDependence::processDepResult(Instruction *inst, 
    MemoryDependenceAnalysis &MDA, AliasAnalysis &AA) {
  // TODO: This is probably a good place to check of the dependency
  // information is calculated on-demand
  MemDepResult Res = MDA.getDependency(inst);


  if (!Res.isNonLocal()) {
    // local results (not-non-local) can be simply handled. They are just
    // a pair of insturctions and a dependency type

    // Get dependency information
    DepInfo newInfo;
    newInfo = getDepInfo(Res);

#ifdef MK_DEBUG
    //errs() << "[DEBUG] newInfo depInst == " << Res.getInst() << '\n';
    if (Res.getInst() == NULL) {
      errs() << "[DEBUG] NULL dependency found, dep type: "
             << depTypeToString(newInfo.Type_) << '\n';
    }
#endif

    // Save into map
    assert(newInfo.valid());
    LocalDeps_[inst] = newInfo;
  }
  else {
    // Handle NonLocal dependencies. The function call
    // getNonLocalPointerDependency() assumes that a result of NonLocal
    // has already been encountered
    
    // Get dependency information
    DepInfo newInfo;
    newInfo = getDepInfo(Res);

    assert(newInfo.Type_ == NonLocal);
    assert(Res.isNonLocal());

    SmallVector<NonLocalDepResult, 4> NLDep;
    if (LoadInst *LI = dyn_cast<LoadInst>(inst)) {
      if (!LI->isUnordered()) {
        // FIXME: Handle atomic/volatile loads.
        errs() << "[WARNING] atomic/volatile loads are not handled\n";
        assert(false && "atomic/volatile loads not handled");
        //Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
                                         //static_cast<BasicBlock *>(0)));
        return;
      }
      AliasAnalysis::Location Loc = AA.getLocation(LI);
      MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDep);
    } 
    else if (StoreInst *SI = dyn_cast<StoreInst>(inst)) {
      if (!SI->isUnordered()) {
        // FIXME: Handle atomic/volatile stores.
        errs() << "[WARNING] atomic/volatile stores are not handled\n";
        assert(false && "atomic/volatile stores not handled");
        //Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
                                         //static_cast<BasicBlock *>(0)));
        return;
      }
      AliasAnalysis::Location Loc = AA.getLocation(SI);
      MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), 
          NLDep);
    } 
    else if (VAArgInst *VI = dyn_cast<VAArgInst>(inst)) {
      AliasAnalysis::Location Loc = AA.getLocation(VI);
      MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), 
          NLDep);
    } 
    else {
      llvm_unreachable("Unknown memory instruction!");
    }

#ifdef MK_DEBUG
    errs() << "[DEBUG] NLDep.size() == " << NLDep.size() << '\n';
#endif
    for (auto I = NLDep.begin(), E = NLDep.end(); I != E; ++I) {
      NonLocalDeps_[inst].push_back(*I);
    }
  } // end else
}
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
    if (!SI->isSimple()) return false;

    if (TD == 0) return false;

    // Detect cases where we're performing call slot forwarding, but
    // happen to be using a load-store pair to implement it, rather than
    // a memcpy.
    if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
        if (LI->isSimple() && LI->hasOneUse() &&
                LI->getParent() == SI->getParent()) {
            MemDepResult ldep = MD->getDependency(LI);
            CallInst *C = 0;
            if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
                C = dyn_cast<CallInst>(ldep.getInst());

            if (C) {
                // Check that nothing touches the dest of the "copy" between
                // the call and the store.
                AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
                AliasAnalysis::Location StoreLoc = AA.getLocation(SI);
                for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
                        E = C; I != E; --I) {
                    if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
                        C = 0;
                        break;
                    }
                }
            }

            if (C) {
                unsigned storeAlign = SI->getAlignment();
                if (!storeAlign)
                    storeAlign = TD->getABITypeAlignment(SI->getOperand(0)->getType());
                unsigned loadAlign = LI->getAlignment();
                if (!loadAlign)
                    loadAlign = TD->getABITypeAlignment(LI->getType());

                bool changed = performCallSlotOptzn(LI,
                                                    SI->getPointerOperand()->stripPointerCasts(),
                                                    LI->getPointerOperand()->stripPointerCasts(),
                                                    TD->getTypeStoreSize(SI->getOperand(0)->getType()),
                                                    std::min(storeAlign, loadAlign), C);
                if (changed) {
                    MD->removeInstruction(SI);
                    SI->eraseFromParent();
                    MD->removeInstruction(LI);
                    LI->eraseFromParent();
                    ++NumMemCpyInstr;
                    return true;
                }
            }
        }
    }

    // There are two cases that are interesting for this code to handle: memcpy
    // and memset.  Right now we only handle memset.

    // Ensure that the value being stored is something that can be memset'able a
    // byte at a time like "0" or "-1" or any width, as well as things like
    // 0xA0A0A0A0 and 0.0.
    if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
        if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
                             ByteVal)) {
            BBI = I;  // Don't invalidate iterator.
            return true;
        }

    return false;
}
Example #17
0
void DSWP::buildPDG(Loop *L) {
    //Initialize PDG
	for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) {
		BasicBlock *BB = *bi;
		for (BasicBlock::iterator ui = BB->begin(); ui != BB->end(); ui++) {
			Instruction *inst = &(*ui);

			//standardlize the name for all expr
			if (util.hasNewDef(inst)) {
				inst->setName(util.genId());
				dname[inst] = inst->getNameStr();
			} else {
				dname[inst] = util.genId();
			}

			pdg[inst] = new vector<Edge>();
			rev[inst] = new vector<Edge>();
		}
	}

	//LoopInfo &li = getAnalysis<LoopInfo>();

	/*
	 * Memory dependency analysis
	 */
	MemoryDependenceAnalysis &mda = getAnalysis<MemoryDependenceAnalysis>();

	for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) {
		BasicBlock *BB = *bi;
		for (BasicBlock::iterator ii = BB->begin(); ii != BB->end(); ii++) {
			Instruction *inst = &(*ii);

			//data dependence = register dependence + memory dependence

			//begin register dependence
			for (Value::use_iterator ui = ii->use_begin(); ui != ii->use_end(); ui++) {
				if (Instruction *user = dyn_cast<Instruction>(*ui)) {
					addEdge(inst, user, REG);
				}
			}
			//finish register dependence

			//begin memory dependence
			MemDepResult mdr = mda.getDependency(inst);
			//TODO not sure clobbers mean!!

			if (mdr.isDef()) {
				Instruction *dep = mdr.getInst();

				if (isa<LoadInst>(inst)) {
					if (isa<StoreInst>(dep)) {
						addEdge(dep, inst, DTRUE);	//READ AFTER WRITE
					}
				}
				if (isa<StoreInst>(inst)) {
					if (isa<LoadInst>(dep)) {
						addEdge(dep, inst, DANTI);	//WRITE AFTER READ
					}
					if (isa<StoreInst>(dep)) {
						addEdge(dep, inst, DOUT);	//WRITE AFTER WRITE
					}
				}
				//READ AFTER READ IS INSERT AFTER PDG BUILD
			}
			//end memory dependence
		}//for ii
	}//for bi

	/*
	 * begin control dependence
	 */
	PostDominatorTree &pdt = getAnalysis<PostDominatorTree>();
	//cout << pdt.getRootNode()->getBlock()->getNameStr() << endl;

	/*
	 * alien code part 1
	 */
	LoopInfo *LI = &getAnalysis<LoopInfo>();
	std::set<BranchInst*> backedgeParents;
	for (Loop::block_iterator bi = L->getBlocks().begin(); bi
			!= L->getBlocks().end(); bi++) {
		BasicBlock *BB = *bi;
		for (BasicBlock::iterator ii = BB->begin(); ii != BB->end(); ii++) {
			Instruction *inst = ii;
			if (BranchInst *brInst = dyn_cast<BranchInst>(inst)) {
				// get the loop this instruction (and therefore basic block) belongs to
				Loop *instLoop = LI->getLoopFor(BB);
				bool branchesToHeader = false;
				for (int i = brInst->getNumSuccessors() - 1; i >= 0
						&& !branchesToHeader; i--) {
					// if the branch could exit, store it
					if (LI->getLoopFor(brInst->getSuccessor(i)) != instLoop) {
						branchesToHeader = true;
					}
				}
				if (branchesToHeader) {
					backedgeParents.insert(brInst);
				}
			}
		}
	}

	//build information for predecessor of blocks in post dominator tree
	for (Function::iterator bi = func->begin(); bi != func->end(); bi++) {
		BasicBlock *BB = bi;
		DomTreeNode *dn = pdt.getNode(BB);

		for (DomTreeNode::iterator di = dn->begin(); di != dn->end(); di++) {
			BasicBlock *CB = (*di)->getBlock();
			pre[CB] = BB;
		}
	}
//
//	//add dependency within a basicblock
//	for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) {
//		BasicBlock *BB = *bi;
//		Instruction *pre = NULL;
//		for (BasicBlock::iterator ui = BB->begin(); ui != BB->end(); ui++) {
//			Instruction *inst = &(*ui);
//			if (pre != NULL) {
//				addEdge(pre, inst, CONTROL);
//			}
//			pre = inst;
//		}
//	}

//	//the special kind of dependence need loop peeling ? I don't know whether this is needed
//	for (Loop::block_iterator bi = L->getBlocks().begin(); bi != L->getBlocks().end(); bi++) {
//		BasicBlock *BB = *bi;
//		for (succ_iterator PI = succ_begin(BB); PI != succ_end(BB); ++PI) {
//			BasicBlock *succ = *PI;
//
//			checkControlDependence(BB, succ, pdt);
//		}
//	}


	/*
	 * alien code part 2
	 */
	// add normal control dependencies
	// loop through each instruction
	for (Loop::block_iterator bbIter = L->block_begin(); bbIter
			!= L->block_end(); ++bbIter) {
		BasicBlock *bb = *bbIter;
		// check the successors of this basic block
		if (BranchInst *branchInst = dyn_cast<BranchInst>(bb->getTerminator())) {
			if (branchInst->getNumSuccessors() > 1) {
				BasicBlock * succ = branchInst->getSuccessor(0);
				// if the successor is nested shallower than the current basic block, continue
				if (LI->getLoopDepth(bb) < LI->getLoopDepth(succ)) {
					continue;
				}
				// otherwise, add all instructions to graph as control dependence
				while (succ != NULL && succ != bb && LI->getLoopDepth(succ)
						>= LI->getLoopDepth(bb)) {
					Instruction *terminator = bb->getTerminator();
					for (BasicBlock::iterator succInstIter = succ->begin(); &(*succInstIter)
							!= succ->getTerminator(); ++succInstIter) {
						addEdge(terminator, &(*succInstIter), CONTROL);
					}
					if (BranchInst *succBrInst = dyn_cast<BranchInst>(succ->getTerminator())) {
						if (succBrInst->getNumSuccessors() > 1) {
							addEdge(terminator, succ->getTerminator(),
									CONTROL);
						}
					}
					if (BranchInst *br = dyn_cast<BranchInst>(succ->getTerminator())) {
						if (br->getNumSuccessors() == 1) {
							succ = br->getSuccessor(0);
						} else {
							succ = NULL;
						}
					} else {
						succ = NULL;
					}
				}
			}
		}
	}


	/*
	 * alien code part 3
	 */
    for (std::set<BranchInst*>::iterator exitIter = backedgeParents.begin(); exitIter != backedgeParents.end(); ++exitIter) {
        BranchInst *exitBranch = *exitIter;
        if (exitBranch->isConditional()) {
            BasicBlock *header = LI->getLoopFor(exitBranch->getParent())->getHeader();
            for (BasicBlock::iterator ctrlIter = header->begin(); ctrlIter != header->end(); ++ctrlIter) {
                addEdge(exitBranch, &(*ctrlIter), CONTROL);
            }
        }
    }

	//end control dependence
}
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
    if (TD == 0) return false;

    // Find out what feeds this byval argument.
    Value *ByValArg = CS.getArgument(ArgNo);
    Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
    uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
    MemDepResult DepInfo =
        MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
                                     true, CS.getInstruction(),
                                     CS.getInstruction()->getParent());
    if (!DepInfo.isClobber())
        return false;

    // If the byval argument isn't fed by a memcpy, ignore it.  If it is fed by
    // a memcpy, see if we can byval from the source of the memcpy instead of the
    // result.
    MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
    if (MDep == 0 || MDep->isVolatile() ||
            ByValArg->stripPointerCasts() != MDep->getDest())
        return false;

    // The length of the memcpy must be larger or equal to the size of the byval.
    ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
    if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
        return false;

    // Get the alignment of the byval.  If the call doesn't specify the alignment,
    // then it is some target specific value that we can't know.
    unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
    if (ByValAlign == 0) return false;

    // If it is greater than the memcpy, then we check to see if we can force the
    // source of the memcpy to the alignment we need.  If we fail, we bail out.
    if (MDep->getAlignment() < ByValAlign &&
            getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign)
        return false;

    // Verify that the copied-from memory doesn't change in between the memcpy and
    // the byval call.
    //    memcpy(a <- b)
    //    *b = 42;
    //    foo(*a)
    // It would be invalid to transform the second memcpy into foo(*b).
    //
    // NOTE: This is conservative, it will stop on any read from the source loc,
    // not just the defining memcpy.
    MemDepResult SourceDep =
        MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
                                     false, CS.getInstruction(), MDep->getParent());
    if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
        return false;

    Value *TmpCast = MDep->getSource();
    if (MDep->getSource()->getType() != ByValArg->getType())
        TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
                                  "tmpcast", CS.getInstruction());

    DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
          << "  " << *MDep << "\n"
          << "  " << *CS.getInstruction() << "\n");

    // Otherwise we're good!  Update the byval argument.
    CS.setArgument(ArgNo, TmpCast);
    ++NumMemCpyInstr;
    return true;
}
Example #19
0
bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
  if (skipFunction(F))
    return false;

  // TODO: Could probably handle variadic functions.
  if (F.isVarArg() || F.hasStructRetAttr() ||
      AMDGPU::isEntryFunctionCC(F.getCallingConv()))
    return false;

  MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();

  unsigned ReturnNumRegs = 0;
  SmallSet<int, 4> OutArgIndexes;
  SmallVector<Type *, 4> ReturnTypes;
  Type *RetTy = F.getReturnType();
  if (!RetTy->isVoidTy()) {
    ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4;

    if (ReturnNumRegs >= MaxNumRetRegs)
      return false;

    ReturnTypes.push_back(RetTy);
  }

  SmallVector<Argument *, 4> OutArgs;
  for (Argument &Arg : F.args()) {
    if (isOutArgumentCandidate(Arg)) {
      LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg
                        << " in function " << F.getName() << '\n');
      OutArgs.push_back(&Arg);
    }
  }

  if (OutArgs.empty())
    return false;

  using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>;

  DenseMap<ReturnInst *, ReplacementVec> Replacements;

  SmallVector<ReturnInst *, 4> Returns;
  for (BasicBlock &BB : F) {
    if (ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back()))
      Returns.push_back(RI);
  }

  if (Returns.empty())
    return false;

  bool Changing;

  do {
    Changing = false;

    // Keep retrying if we are able to successfully eliminate an argument. This
    // helps with cases with multiple arguments which may alias, such as in a
    // sincos implemntation. If we have 2 stores to arguments, on the first
    // attempt the MDA query will succeed for the second store but not the
    // first. On the second iteration we've removed that out clobbering argument
    // (by effectively moving it into another function) and will find the second
    // argument is OK to move.
    for (Argument *OutArg : OutArgs) {
      bool ThisReplaceable = true;
      SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores;

      Type *ArgTy = OutArg->getType()->getPointerElementType();

      // Skip this argument if converting it will push us over the register
      // count to return limit.

      // TODO: This is an approximation. When legalized this could be more. We
      // can ask TLI for exactly how many.
      unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4;
      if (ArgNumRegs + ReturnNumRegs > MaxNumRetRegs)
        continue;

      // An argument is convertible only if all exit blocks are able to replace
      // it.
      for (ReturnInst *RI : Returns) {
        BasicBlock *BB = RI->getParent();

        MemDepResult Q = MDA->getPointerDependencyFrom(MemoryLocation(OutArg),
                                                       true, BB->end(), BB, RI);
        StoreInst *SI = nullptr;
        if (Q.isDef())
          SI = dyn_cast<StoreInst>(Q.getInst());

        if (SI) {
          LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n');
          ReplaceableStores.emplace_back(RI, SI);
        } else {
          ThisReplaceable = false;
          break;
        }
      }

      if (!ThisReplaceable)
        continue; // Try the next argument candidate.

      for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) {
        Value *ReplVal = Store.second->getValueOperand();

        auto &ValVec = Replacements[Store.first];
        if (llvm::find_if(ValVec,
              [OutArg](const std::pair<Argument *, Value *> &Entry) {
                 return Entry.first == OutArg;}) != ValVec.end()) {
          LLVM_DEBUG(dbgs()
                     << "Saw multiple out arg stores" << *OutArg << '\n');
          // It is possible to see stores to the same argument multiple times,
          // but we expect these would have been optimized out already.
          ThisReplaceable = false;
          break;
        }

        ValVec.emplace_back(OutArg, ReplVal);
        Store.second->eraseFromParent();
      }

      if (ThisReplaceable) {
        ReturnTypes.push_back(ArgTy);
        OutArgIndexes.insert(OutArg->getArgNo());
        ++NumOutArgumentsReplaced;
        Changing = true;
      }
    }
  } while (Changing);

  if (Replacements.empty())
    return false;

  LLVMContext &Ctx = F.getParent()->getContext();
  StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName());

  FunctionType *NewFuncTy = FunctionType::get(NewRetTy,
                                              F.getFunctionType()->params(),
                                              F.isVarArg());

  LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n');

  Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage,
                                       F.getName() + ".body");
  F.getParent()->getFunctionList().insert(F.getIterator(), NewFunc);
  NewFunc->copyAttributesFrom(&F);
  NewFunc->setComdat(F.getComdat());

  // We want to preserve the function and param attributes, but need to strip
  // off any return attributes, e.g. zeroext doesn't make sense with a struct.
  NewFunc->stealArgumentListFrom(F);

  AttrBuilder RetAttrs;
  RetAttrs.addAttribute(Attribute::SExt);
  RetAttrs.addAttribute(Attribute::ZExt);
  RetAttrs.addAttribute(Attribute::NoAlias);
  NewFunc->removeAttributes(AttributeList::ReturnIndex, RetAttrs);
  // TODO: How to preserve metadata?

  // Move the body of the function into the new rewritten function, and replace
  // this function with a stub.
  NewFunc->getBasicBlockList().splice(NewFunc->begin(), F.getBasicBlockList());

  for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) {
    ReturnInst *RI = Replacement.first;
    IRBuilder<> B(RI);
    B.SetCurrentDebugLocation(RI->getDebugLoc());

    int RetIdx = 0;
    Value *NewRetVal = UndefValue::get(NewRetTy);

    Value *RetVal = RI->getReturnValue();
    if (RetVal)
      NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);

    for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) {
      Argument *Arg = ReturnPoint.first;
      Value *Val = ReturnPoint.second;
      Type *EltTy = Arg->getType()->getPointerElementType();
      if (Val->getType() != EltTy) {
        Type *EffectiveEltTy = EltTy;
        if (StructType *CT = dyn_cast<StructType>(EltTy)) {
          assert(CT->getNumElements() == 1);
          EffectiveEltTy = CT->getElementType(0);
        }

        if (DL->getTypeSizeInBits(EffectiveEltTy) !=
            DL->getTypeSizeInBits(Val->getType())) {
          assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType()));
          Val = B.CreateShuffleVector(Val, UndefValue::get(Val->getType()),
                                      { 0, 1, 2 });
        }

        Val = B.CreateBitCast(Val, EffectiveEltTy);

        // Re-create single element composite.
        if (EltTy != EffectiveEltTy)
          Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0);
      }

      NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++);
    }

    if (RetVal)
      RI->setOperand(0, NewRetVal);
    else {
      B.CreateRet(NewRetVal);
      RI->eraseFromParent();
    }
  }

  SmallVector<Value *, 16> StubCallArgs;
  for (Argument &Arg : F.args()) {
    if (OutArgIndexes.count(Arg.getArgNo())) {
      // It's easier to preserve the type of the argument list. We rely on
      // DeadArgumentElimination to take care of these.
      StubCallArgs.push_back(UndefValue::get(Arg.getType()));
    } else {
      StubCallArgs.push_back(&Arg);
    }
  }

  BasicBlock *StubBB = BasicBlock::Create(Ctx, "", &F);
  IRBuilder<> B(StubBB);
  CallInst *StubCall = B.CreateCall(NewFunc, StubCallArgs);

  int RetIdx = RetTy->isVoidTy() ? 0 : 1;
  for (Argument &Arg : F.args()) {
    if (!OutArgIndexes.count(Arg.getArgNo()))
      continue;

    PointerType *ArgType = cast<PointerType>(Arg.getType());

    auto *EltTy = ArgType->getElementType();
    unsigned Align = Arg.getParamAlignment();
    if (Align == 0)
      Align = DL->getABITypeAlignment(EltTy);

    Value *Val = B.CreateExtractValue(StubCall, RetIdx++);
    Type *PtrTy = Val->getType()->getPointerTo(ArgType->getAddressSpace());

    // We can peek through bitcasts, so the type may not match.
    Value *PtrVal = B.CreateBitCast(&Arg, PtrTy);

    B.CreateAlignedStore(Val, PtrVal, Align);
  }

  if (!RetTy->isVoidTy()) {
    B.CreateRet(B.CreateExtractValue(StubCall, 0));
  } else {
    B.CreateRetVoid();
  }

  // The function is now a stub we want to inline.
  F.addFnAttr(Attribute::AlwaysInline);

  ++NumOutArgumentFunctionsReplaced;
  return true;
}