예제 #1
0
  ~MMIAddrLabelMap() {
    assert(DeletedAddrLabelsNeedingEmission.empty() &&
           "Some labels for deleted blocks never got emitted");

    // Deallocate any of the 'list of symbols' case.
    for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
         I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
      if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
        delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
  }
예제 #2
0
bool FuncletLayout::runOnMachineFunction(MachineFunction &F) {
  DenseMap<const MachineBasicBlock *, int> FuncletMembership =
      getFuncletMembership(F);
  if (FuncletMembership.empty())
    return false;

  F.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) {
    auto FuncletX = FuncletMembership.find(&X);
    auto FuncletY = FuncletMembership.find(&Y);
    assert(FuncletX != FuncletMembership.end());
    assert(FuncletY != FuncletMembership.end());
    return FuncletX->second < FuncletY->second;
  });

  // Conservatively assume we changed something.
  return true;
}
예제 #3
0
bool JumpInstrTables::runOnModule(Module &M) {
  JITI = &getAnalysis<JumpInstrTableInfo>();

  // Get the set of jumptable-annotated functions.
  DenseMap<Function *, Function *> Functions;
  for (Function &F : M) {
    if (F.hasFnAttribute(Attribute::JumpTable)) {
      assert(F.hasUnnamedAddr() &&
             "Attribute 'jumptable' requires 'unnamed_addr'");
      Functions[&F] = nullptr;
    }
  }

  // Create the jump-table functions.
  for (auto &KV : Functions) {
    Function *F = KV.first;
    KV.second = insertEntry(M, F);
  }

  // GlobalAlias is a special case, because the target of an alias statement
  // must be a defined function. So, instead of replacing a given function in
  // the alias, we replace all uses of aliases that target jumptable functions.
  // Note that there's no need to create these functions, since only aliases
  // that target known jumptable functions are replaced, and there's no way to
  // put the jumptable annotation on a global alias.
  DenseMap<GlobalAlias *, Function *> Aliases;
  for (GlobalAlias &GA : M.aliases()) {
    Constant *Aliasee = GA.getAliasee();
    if (Function *F = dyn_cast<Function>(Aliasee)) {
      auto it = Functions.find(F);
      if (it != Functions.end()) {
        Aliases[&GA] = it->second;
      }
    }
  }

  // Replace each address taken function with its jump-instruction table entry.
  for (auto &KV : Functions)
    replaceValueWithFunction(KV.first, KV.second);

  for (auto &KV : Aliases)
    replaceValueWithFunction(KV.first, KV.second);

  return !Functions.empty();
}
예제 #4
0
static void ComputeNumbering(Function *F, DenseMap<Value*,unsigned> &Numbering){
  unsigned IN = 0;

  // Arguments get the first numbers.
  for (Function::arg_iterator
         AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI)
    if (!AI->hasName())
      Numbering[&*AI] = IN++;

  // Walk the basic blocks in order.
  for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) {
    if (!FI->hasName())
      Numbering[&*FI] = IN++;

    // Walk the instructions in order.
    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI)
      // void instructions don't get numbers.
      if (!BI->hasName() && !BI->getType()->isVoidTy())
        Numbering[&*BI] = IN++;
  }

  assert(!Numbering.empty() && "asked for numbering but numbering was no-op");
}
/// getNonLocalPointerDepFromBB - Perform a dependency query based on
/// pointer/pointeesize starting at the end of StartBB.  Add any clobber/def
/// results to the results vector and keep track of which blocks are visited in
/// 'Visited'.
///
/// This has special behavior for the first block queries (when SkipFirstBlock
/// is true).  In this special case, it ignores the contents of the specified
/// block and starts returning dependence info for its predecessors.
///
/// This function returns false on success, or true to indicate that it could
/// not compute dependence information for some reason.  This should be treated
/// as a clobber dependence on the first instruction in the predecessor block.
bool MemoryDependenceAnalysis::
getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
                            bool isLoad, BasicBlock *StartBB,
                            SmallVectorImpl<NonLocalDepEntry> &Result,
                            DenseMap<BasicBlock*, Value*> &Visited,
                            bool SkipFirstBlock) {
  
  // Look up the cached info for Pointer.
  ValueIsLoadPair CacheKey(Pointer, isLoad);
  
  std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo =
    &NonLocalPointerDeps[CacheKey];
  NonLocalDepInfo *Cache = &CacheInfo->second;

  // If we have valid cached information for exactly the block we are
  // investigating, just return it with no recomputation.
  if (CacheInfo->first == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
    // We have a fully cached result for this query then we can just return the
    // cached results and populate the visited set.  However, we have to verify
    // that we don't already have conflicting results for these blocks.  Check
    // to ensure that if a block in the results set is in the visited set that
    // it was for the same pointer query.
    if (!Visited.empty()) {
      for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
           I != E; ++I) {
        DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->first);
        if (VI == Visited.end() || VI->second == Pointer) continue;
        
        // We have a pointer mismatch in a block.  Just return clobber, saying
        // that something was clobbered in this result.  We could also do a
        // non-fully cached query, but there is little point in doing this.
        return true;
      }
    }
    
    for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
         I != E; ++I) {
      Visited.insert(std::make_pair(I->first, Pointer));
      if (!I->second.isNonLocal())
        Result.push_back(*I);
    }
    ++NumCacheCompleteNonLocalPtr;
    return false;
  }
  
  // Otherwise, either this is a new block, a block with an invalid cache
  // pointer or one that we're about to invalidate by putting more info into it
  // than its valid cache info.  If empty, the result will be valid cache info,
  // otherwise it isn't.
  if (Cache->empty())
    CacheInfo->first = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
  else
    CacheInfo->first = BBSkipFirstBlockPair();
  
  SmallVector<BasicBlock*, 32> Worklist;
  Worklist.push_back(StartBB);
  
  // Keep track of the entries that we know are sorted.  Previously cached
  // entries will all be sorted.  The entries we add we only sort on demand (we
  // don't insert every element into its sorted position).  We know that we
  // won't get any reuse from currently inserted values, because we don't
  // revisit blocks after we insert info for them.
  unsigned NumSortedEntries = Cache->size();
  DEBUG(AssertSorted(*Cache));
  
  while (!Worklist.empty()) {
    BasicBlock *BB = Worklist.pop_back_val();
    
    // Skip the first block if we have it.
    if (!SkipFirstBlock) {
      // Analyze the dependency of *Pointer in FromBB.  See if we already have
      // been here.
      assert(Visited.count(BB) && "Should check 'visited' before adding to WL");

      // Get the dependency info for Pointer in BB.  If we have cached
      // information, we will use it, otherwise we compute it.
      DEBUG(AssertSorted(*Cache, NumSortedEntries));
      MemDepResult Dep = GetNonLocalInfoForBlock(Pointer, PointeeSize, isLoad,
                                                 BB, Cache, NumSortedEntries);
      
      // If we got a Def or Clobber, add this to the list of results.
      if (!Dep.isNonLocal()) {
        Result.push_back(NonLocalDepEntry(BB, Dep));
        continue;
      }
    }
    
    // If 'Pointer' is an instruction defined in this block, then we need to do
    // phi translation to change it into a value live in the predecessor block.
    // If phi translation fails, then we can't continue dependence analysis.
    Instruction *PtrInst = dyn_cast<Instruction>(Pointer);
    bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB;
    
    // If no PHI translation is needed, just add all the predecessors of this
    // block to scan them as well.
    if (!NeedsPHITranslation) {
      SkipFirstBlock = false;
      for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
        // Verify that we haven't looked at this block yet.
        std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
          InsertRes = Visited.insert(std::make_pair(*PI, Pointer));
        if (InsertRes.second) {
          // First time we've looked at *PI.
          Worklist.push_back(*PI);
          continue;
        }
        
        // If we have seen this block before, but it was with a different
        // pointer then we have a phi translation failure and we have to treat
        // this as a clobber.
        if (InsertRes.first->second != Pointer)
          goto PredTranslationFailure;
      }
      continue;
    }
    
    // If we do need to do phi translation, then there are a bunch of different
    // cases, because we have to find a Value* live in the predecessor block. We
    // know that PtrInst is defined in this block at least.
    
    // If this is directly a PHI node, just use the incoming values for each
    // pred as the phi translated version.
    if (PHINode *PtrPHI = dyn_cast<PHINode>(PtrInst)) {
      for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
        BasicBlock *Pred = *PI;
        Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred);
        
        // Check to see if we have already visited this pred block with another
        // pointer.  If so, we can't do this lookup.  This failure can occur
        // with PHI translation when a critical edge exists and the PHI node in
        // the successor translates to a pointer value different than the
        // pointer the block was first analyzed with.
        std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
          InsertRes = Visited.insert(std::make_pair(Pred, PredPtr));

        if (!InsertRes.second) {
          // If the predecessor was visited with PredPtr, then we already did
          // the analysis and can ignore it.
          if (InsertRes.first->second == PredPtr)
            continue;
          
          // Otherwise, the block was previously analyzed with a different
          // pointer.  We can't represent the result of this case, so we just
          // treat this as a phi translation failure.
          goto PredTranslationFailure;
        }

        // We may have added values to the cache list before this PHI
        // translation.  If so, we haven't done anything to ensure that the
        // cache remains sorted.  Sort it now (if needed) so that recursive
        // invocations of getNonLocalPointerDepFromBB that could reuse the cache
        // value will only see properly sorted cache arrays.
        if (Cache && NumSortedEntries != Cache->size())
          std::sort(Cache->begin(), Cache->end());
        Cache = 0;
        
        // FIXME: it is entirely possible that PHI translating will end up with
        // the same value.  Consider PHI translating something like:
        // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
        // to recurse here, pedantically speaking.
        
        // If we have a problem phi translating, fall through to the code below
        // to handle the failure condition.
        if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred,
                                        Result, Visited))
          goto PredTranslationFailure;
      }

      // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
      CacheInfo = &NonLocalPointerDeps[CacheKey];
      Cache = &CacheInfo->second;
      NumSortedEntries = Cache->size();
      
      // Since we did phi translation, the "Cache" set won't contain all of the
      // results for the query.  This is ok (we can still use it to accelerate
      // specific block queries) but we can't do the fastpath "return all
      // results from the set"  Clear out the indicator for this.
      CacheInfo->first = BBSkipFirstBlockPair();
      SkipFirstBlock = false;
      continue;
    }
    
    // TODO: BITCAST, GEP.
    
    //   cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
    //   if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
    //     cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
  PredTranslationFailure:
    
    if (Cache == 0) {
      // Refresh the CacheInfo/Cache pointer if it got invalidated.
      CacheInfo = &NonLocalPointerDeps[CacheKey];
      Cache = &CacheInfo->second;
      NumSortedEntries = Cache->size();
    } else if (NumSortedEntries != Cache->size()) {
      std::sort(Cache->begin(), Cache->end());
      NumSortedEntries = Cache->size();
    }

    // Since we did phi translation, the "Cache" set won't contain all of the
    // results for the query.  This is ok (we can still use it to accelerate
    // specific block queries) but we can't do the fastpath "return all
    // results from the set"  Clear out the indicator for this.
    CacheInfo->first = BBSkipFirstBlockPair();
    
    // If *nothing* works, mark the pointer as being clobbered by the first
    // instruction in this block.
    //
    // If this is the magic first block, return this as a clobber of the whole
    // incoming value.  Since we can't phi translate to one of the predecessors,
    // we have to bail out.
    if (SkipFirstBlock)
      return true;
    
    for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
      assert(I != Cache->rend() && "Didn't find current block??");
      if (I->first != BB)
        continue;
      
      assert(I->second.isNonLocal() &&
             "Should only be here with transparent block");
      I->second = MemDepResult::getClobber(BB->begin());
      ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey.getOpaqueValue());
      Result.push_back(*I);
      break;
    }
  }

  // Okay, we're done now.  If we added new values to the cache, re-sort it.
  switch (Cache->size()-NumSortedEntries) {
  case 0:
    // done, no new entries.
    break;
  case 2: {
    // Two new entries, insert the last one into place.
    NonLocalDepEntry Val = Cache->back();
    Cache->pop_back();
    NonLocalDepInfo::iterator Entry =
    std::upper_bound(Cache->begin(), Cache->end()-1, Val);
    Cache->insert(Entry, Val);
    // FALL THROUGH.
  }
  case 1:
    // One new entry, Just insert the new value at the appropriate position.
    if (Cache->size() != 1) {
      NonLocalDepEntry Val = Cache->back();
      Cache->pop_back();
      NonLocalDepInfo::iterator Entry =
        std::upper_bound(Cache->begin(), Cache->end(), Val);
      Cache->insert(Entry, Val);
    }
    break;
  default:
    // Added many values, do a full scale sort.
    std::sort(Cache->begin(), Cache->end());
  }
  DEBUG(AssertSorted(*Cache));
  return false;
}
예제 #6
0
 ~MMIAddrLabelMap() {
   assert(DeletedAddrLabelsNeedingEmission.empty() &&
          "Some labels for deleted blocks never got emitted");
 }
예제 #7
0
bool DevirtModule::run() {
  Function *BitSetTestFunc =
      M.getFunction(Intrinsic::getName(Intrinsic::bitset_test));
  if (!BitSetTestFunc || BitSetTestFunc->use_empty())
    return false;

  Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
  if (!AssumeFunc || AssumeFunc->use_empty())
    return false;

  // Find all virtual calls via a virtual table pointer %p under an assumption
  // of the form llvm.assume(llvm.bitset.test(%p, %md)). This indicates that %p
  // points to a vtable in the bitset %md. Group calls by (bitset, offset) pair
  // (effectively the identity of the virtual function) and store to CallSlots.
  DenseSet<Value *> SeenPtrs;
  for (auto I = BitSetTestFunc->use_begin(), E = BitSetTestFunc->use_end();
       I != E;) {
    auto CI = dyn_cast<CallInst>(I->getUser());
    ++I;
    if (!CI)
      continue;

    // Find llvm.assume intrinsics for this llvm.bitset.test call.
    SmallVector<CallInst *, 1> Assumes;
    for (const Use &CIU : CI->uses()) {
      auto AssumeCI = dyn_cast<CallInst>(CIU.getUser());
      if (AssumeCI && AssumeCI->getCalledValue() == AssumeFunc)
        Assumes.push_back(AssumeCI);
    }

    // If we found any, search for virtual calls based on %p and add them to
    // CallSlots.
    if (!Assumes.empty()) {
      Metadata *BitSet =
          cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
      Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
      if (SeenPtrs.insert(Ptr).second)
        findLoadCallsAtConstantOffset(BitSet, Ptr, 0, CI->getArgOperand(0));
    }

    // We no longer need the assumes or the bitset test.
    for (auto Assume : Assumes)
      Assume->eraseFromParent();
    // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
    // may use the vtable argument later.
    if (CI->use_empty())
      CI->eraseFromParent();
  }

  // Rebuild llvm.bitsets metadata into a map for easy lookup.
  std::vector<VTableBits> Bits;
  DenseMap<Metadata *, std::set<BitSetInfo>> BitSets;
  buildBitSets(Bits, BitSets);
  if (BitSets.empty())
    return true;

  // For each (bitset, offset) pair:
  bool DidVirtualConstProp = false;
  for (auto &S : CallSlots) {
    // Search each of the vtables in the bitset for the virtual function
    // implementation at offset S.first.ByteOffset, and add to TargetsForSlot.
    std::vector<VirtualCallTarget> TargetsForSlot;
    if (!tryFindVirtualCallTargets(TargetsForSlot, BitSets[S.first.BitSetID],
                                   S.first.ByteOffset))
      continue;

    if (trySingleImplDevirt(TargetsForSlot, S.second))
      continue;

    DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second);
  }

  // Rebuild each global we touched as part of virtual constant propagation to
  // include the before and after bytes.
  if (DidVirtualConstProp)
    for (VTableBits &B : Bits)
      rebuildGlobal(B);

  return true;
}
예제 #8
0
bool DevirtModule::run() {
  Function *TypeTestFunc =
      M.getFunction(Intrinsic::getName(Intrinsic::type_test));
  Function *TypeCheckedLoadFunc =
      M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
  Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));

  if ((!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
       AssumeFunc->use_empty()) &&
      (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
    return false;

  if (TypeTestFunc && AssumeFunc)
    scanTypeTestUsers(TypeTestFunc, AssumeFunc);

  if (TypeCheckedLoadFunc)
    scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);

  // Rebuild type metadata into a map for easy lookup.
  std::vector<VTableBits> Bits;
  DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
  buildTypeIdentifierMap(Bits, TypeIdMap);
  if (TypeIdMap.empty())
    return true;

  // For each (type, offset) pair:
  bool DidVirtualConstProp = false;
  std::map<std::string, Function*> DevirtTargets;
  for (auto &S : CallSlots) {
    // Search each of the members of the type identifier for the virtual
    // function implementation at offset S.first.ByteOffset, and add to
    // TargetsForSlot.
    std::vector<VirtualCallTarget> TargetsForSlot;
    if (!tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
                                   S.first.ByteOffset))
      continue;

    if (!trySingleImplDevirt(TargetsForSlot, S.second) &&
        tryVirtualConstProp(TargetsForSlot, S.second))
        DidVirtualConstProp = true;

    // Collect functions devirtualized at least for one call site for stats.
    if (RemarksEnabled)
      for (const auto &T : TargetsForSlot)
        if (T.WasDevirt)
          DevirtTargets[T.Fn->getName()] = T.Fn;
  }

  if (RemarksEnabled) {
    // Generate remarks for each devirtualized function.
    for (const auto &DT : DevirtTargets) {
      Function *F = DT.second;
      DISubprogram *SP = F->getSubprogram();
      DebugLoc DL = SP ? DebugLoc::get(SP->getScopeLine(), 0, SP) : DebugLoc();
      emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, DL,
                             Twine("devirtualized ") + F->getName());
    }
  }

  // If we were able to eliminate all unsafe uses for a type checked load,
  // eliminate the type test by replacing it with true.
  if (TypeCheckedLoadFunc) {
    auto True = ConstantInt::getTrue(M.getContext());
    for (auto &&U : NumUnsafeUsesForTypeTest) {
      if (U.second == 0) {
        U.first->replaceAllUsesWith(True);
        U.first->eraseFromParent();
      }
    }
  }

  // Rebuild each global we touched as part of virtual constant propagation to
  // include the before and after bytes.
  if (DidVirtualConstProp)
    for (VTableBits &B : Bits)
      rebuildGlobal(B);

  return true;
}
예제 #9
0
 bool isEmpty() const { return Roots.empty(); }
예제 #10
0
bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
  if (skipFunction(F))
    return false;

  // TODO: Could probably handle variadic functions.
  if (F.isVarArg() || F.hasStructRetAttr() ||
      AMDGPU::isEntryFunctionCC(F.getCallingConv()))
    return false;

  MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();

  unsigned ReturnNumRegs = 0;
  SmallSet<int, 4> OutArgIndexes;
  SmallVector<Type *, 4> ReturnTypes;
  Type *RetTy = F.getReturnType();
  if (!RetTy->isVoidTy()) {
    ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4;

    if (ReturnNumRegs >= MaxNumRetRegs)
      return false;

    ReturnTypes.push_back(RetTy);
  }

  SmallVector<Argument *, 4> OutArgs;
  for (Argument &Arg : F.args()) {
    if (isOutArgumentCandidate(Arg)) {
      LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg
                        << " in function " << F.getName() << '\n');
      OutArgs.push_back(&Arg);
    }
  }

  if (OutArgs.empty())
    return false;

  using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>;

  DenseMap<ReturnInst *, ReplacementVec> Replacements;

  SmallVector<ReturnInst *, 4> Returns;
  for (BasicBlock &BB : F) {
    if (ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back()))
      Returns.push_back(RI);
  }

  if (Returns.empty())
    return false;

  bool Changing;

  do {
    Changing = false;

    // Keep retrying if we are able to successfully eliminate an argument. This
    // helps with cases with multiple arguments which may alias, such as in a
    // sincos implemntation. If we have 2 stores to arguments, on the first
    // attempt the MDA query will succeed for the second store but not the
    // first. On the second iteration we've removed that out clobbering argument
    // (by effectively moving it into another function) and will find the second
    // argument is OK to move.
    for (Argument *OutArg : OutArgs) {
      bool ThisReplaceable = true;
      SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores;

      Type *ArgTy = OutArg->getType()->getPointerElementType();

      // Skip this argument if converting it will push us over the register
      // count to return limit.

      // TODO: This is an approximation. When legalized this could be more. We
      // can ask TLI for exactly how many.
      unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4;
      if (ArgNumRegs + ReturnNumRegs > MaxNumRetRegs)
        continue;

      // An argument is convertible only if all exit blocks are able to replace
      // it.
      for (ReturnInst *RI : Returns) {
        BasicBlock *BB = RI->getParent();

        MemDepResult Q = MDA->getPointerDependencyFrom(MemoryLocation(OutArg),
                                                       true, BB->end(), BB, RI);
        StoreInst *SI = nullptr;
        if (Q.isDef())
          SI = dyn_cast<StoreInst>(Q.getInst());

        if (SI) {
          LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n');
          ReplaceableStores.emplace_back(RI, SI);
        } else {
          ThisReplaceable = false;
          break;
        }
      }

      if (!ThisReplaceable)
        continue; // Try the next argument candidate.

      for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) {
        Value *ReplVal = Store.second->getValueOperand();

        auto &ValVec = Replacements[Store.first];
        if (llvm::find_if(ValVec,
              [OutArg](const std::pair<Argument *, Value *> &Entry) {
                 return Entry.first == OutArg;}) != ValVec.end()) {
          LLVM_DEBUG(dbgs()
                     << "Saw multiple out arg stores" << *OutArg << '\n');
          // It is possible to see stores to the same argument multiple times,
          // but we expect these would have been optimized out already.
          ThisReplaceable = false;
          break;
        }

        ValVec.emplace_back(OutArg, ReplVal);
        Store.second->eraseFromParent();
      }

      if (ThisReplaceable) {
        ReturnTypes.push_back(ArgTy);
        OutArgIndexes.insert(OutArg->getArgNo());
        ++NumOutArgumentsReplaced;
        Changing = true;
      }
    }
  } while (Changing);

  if (Replacements.empty())
    return false;

  LLVMContext &Ctx = F.getParent()->getContext();
  StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName());

  FunctionType *NewFuncTy = FunctionType::get(NewRetTy,
                                              F.getFunctionType()->params(),
                                              F.isVarArg());

  LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n');

  Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage,
                                       F.getName() + ".body");
  F.getParent()->getFunctionList().insert(F.getIterator(), NewFunc);
  NewFunc->copyAttributesFrom(&F);
  NewFunc->setComdat(F.getComdat());

  // We want to preserve the function and param attributes, but need to strip
  // off any return attributes, e.g. zeroext doesn't make sense with a struct.
  NewFunc->stealArgumentListFrom(F);

  AttrBuilder RetAttrs;
  RetAttrs.addAttribute(Attribute::SExt);
  RetAttrs.addAttribute(Attribute::ZExt);
  RetAttrs.addAttribute(Attribute::NoAlias);
  NewFunc->removeAttributes(AttributeList::ReturnIndex, RetAttrs);
  // TODO: How to preserve metadata?

  // Move the body of the function into the new rewritten function, and replace
  // this function with a stub.
  NewFunc->getBasicBlockList().splice(NewFunc->begin(), F.getBasicBlockList());

  for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) {
    ReturnInst *RI = Replacement.first;
    IRBuilder<> B(RI);
    B.SetCurrentDebugLocation(RI->getDebugLoc());

    int RetIdx = 0;
    Value *NewRetVal = UndefValue::get(NewRetTy);

    Value *RetVal = RI->getReturnValue();
    if (RetVal)
      NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);

    for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) {
      Argument *Arg = ReturnPoint.first;
      Value *Val = ReturnPoint.second;
      Type *EltTy = Arg->getType()->getPointerElementType();
      if (Val->getType() != EltTy) {
        Type *EffectiveEltTy = EltTy;
        if (StructType *CT = dyn_cast<StructType>(EltTy)) {
          assert(CT->getNumElements() == 1);
          EffectiveEltTy = CT->getElementType(0);
        }

        if (DL->getTypeSizeInBits(EffectiveEltTy) !=
            DL->getTypeSizeInBits(Val->getType())) {
          assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType()));
          Val = B.CreateShuffleVector(Val, UndefValue::get(Val->getType()),
                                      { 0, 1, 2 });
        }

        Val = B.CreateBitCast(Val, EffectiveEltTy);

        // Re-create single element composite.
        if (EltTy != EffectiveEltTy)
          Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0);
      }

      NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++);
    }

    if (RetVal)
      RI->setOperand(0, NewRetVal);
    else {
      B.CreateRet(NewRetVal);
      RI->eraseFromParent();
    }
  }

  SmallVector<Value *, 16> StubCallArgs;
  for (Argument &Arg : F.args()) {
    if (OutArgIndexes.count(Arg.getArgNo())) {
      // It's easier to preserve the type of the argument list. We rely on
      // DeadArgumentElimination to take care of these.
      StubCallArgs.push_back(UndefValue::get(Arg.getType()));
    } else {
      StubCallArgs.push_back(&Arg);
    }
  }

  BasicBlock *StubBB = BasicBlock::Create(Ctx, "", &F);
  IRBuilder<> B(StubBB);
  CallInst *StubCall = B.CreateCall(NewFunc, StubCallArgs);

  int RetIdx = RetTy->isVoidTy() ? 0 : 1;
  for (Argument &Arg : F.args()) {
    if (!OutArgIndexes.count(Arg.getArgNo()))
      continue;

    PointerType *ArgType = cast<PointerType>(Arg.getType());

    auto *EltTy = ArgType->getElementType();
    unsigned Align = Arg.getParamAlignment();
    if (Align == 0)
      Align = DL->getABITypeAlignment(EltTy);

    Value *Val = B.CreateExtractValue(StubCall, RetIdx++);
    Type *PtrTy = Val->getType()->getPointerTo(ArgType->getAddressSpace());

    // We can peek through bitcasts, so the type may not match.
    Value *PtrVal = B.CreateBitCast(&Arg, PtrTy);

    B.CreateAlignedStore(Val, PtrVal, Align);
  }

  if (!RetTy->isVoidTy()) {
    B.CreateRet(B.CreateExtractValue(StubCall, 0));
  } else {
    B.CreateRetVoid();
  }

  // The function is now a stub we want to inline.
  F.addFnAttr(Attribute::AlwaysInline);

  ++NumOutArgumentFunctionsReplaced;
  return true;
}
예제 #11
0
bool DevirtModule::run() {
  Function *TypeTestFunc =
      M.getFunction(Intrinsic::getName(Intrinsic::type_test));
  Function *TypeCheckedLoadFunc =
      M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
  Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));

  if ((!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
       AssumeFunc->use_empty()) &&
      (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
    return false;

  if (TypeTestFunc && AssumeFunc)
    scanTypeTestUsers(TypeTestFunc, AssumeFunc);

  if (TypeCheckedLoadFunc)
    scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);

  // Rebuild type metadata into a map for easy lookup.
  std::vector<VTableBits> Bits;
  DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
  buildTypeIdentifierMap(Bits, TypeIdMap);
  if (TypeIdMap.empty())
    return true;

  // For each (type, offset) pair:
  bool DidVirtualConstProp = false;
  for (auto &S : CallSlots) {
    // Search each of the members of the type identifier for the virtual
    // function implementation at offset S.first.ByteOffset, and add to
    // TargetsForSlot.
    std::vector<VirtualCallTarget> TargetsForSlot;
    if (!tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
                                   S.first.ByteOffset))
      continue;

    if (trySingleImplDevirt(TargetsForSlot, S.second))
      continue;

    DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second);
  }

  // If we were able to eliminate all unsafe uses for a type checked load,
  // eliminate the type test by replacing it with true.
  if (TypeCheckedLoadFunc) {
    auto True = ConstantInt::getTrue(M.getContext());
    for (auto &&U : NumUnsafeUsesForTypeTest) {
      if (U.second == 0) {
        U.first->replaceAllUsesWith(True);
        U.first->eraseFromParent();
      }
    }
  }

  // Rebuild each global we touched as part of virtual constant propagation to
  // include the before and after bytes.
  if (DidVirtualConstProp)
    for (VTableBits &B : Bits)
      rebuildGlobal(B);

  return true;
}