Exemple #1
0
bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
  unsigned Idx = 1;
  for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
       ++AI, ++Idx) {
    if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
      Argument* Arg = AI;
      if (!isa<PointerType>(Arg->getType())) {
        for (Instruction::use_iterator UI = Arg->use_begin();
             UI != Arg->use_end();) {
          if (isa<SExtInst>(*UI)) {
            Instruction* Use = cast<Instruction>(*UI);
            SExtInst* SI = new SExtInst(Arg, Use->getType());
            assert (EVT::getEVT(SI->getType()) ==
                    (EVT::getEVT(Use->getType())));
            ++UI;
            Use->replaceAllUsesWith(SI);
            Instruction* First = F.getEntryBlock().begin();
            SI->insertBefore(First);
            Use->eraseFromParent();
          } else {
            ++UI;
          }
        }
      }
    }
  }
  return true;
}
SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
  // no interprocedural analysis is done at the moment
  if (!A.hasByValAttr()) {
    ++ObjectVisitorArgument;
    return unknown();
  }
  PointerType *PT = cast<PointerType>(A.getType());
  APInt Size(IntTyBits, TD->getTypeAllocSize(PT->getElementType()));
  return std::make_pair(align(Size, A.getParamAlignment()), Zero);
}
ArgumentArea::ArgumentArea(QWidget *parent, Argument arg)
	: KHBox(parent)
{
	prefix = arg.getPrefix();
	
	QLabel *label = new QLabel(arg.getName(), this);
	label->setToolTip(arg.getDescription());
	
	if(arg.getOptional() == true){
		QCheckBox *enabled = new QCheckBox("", this);
		enabled->setCheckState(Qt::Unchecked);
		if(arg.getType() == Argument::Switch){
			connect(enabled, SIGNAL(stateChanged(int)), this, SLOT(switchUpdate(int)));
		}
bool AMDGPURewriteOutArguments::isOutArgumentCandidate(Argument &Arg) const {
  const unsigned MaxOutArgSizeBytes = 4 * MaxNumRetRegs;
  PointerType *ArgTy = dyn_cast<PointerType>(Arg.getType());

  // TODO: It might be useful for any out arguments, not just privates.
  if (!ArgTy || (ArgTy->getAddressSpace() != DL->getAllocaAddrSpace() &&
                 !AnyAddressSpace) ||
      Arg.hasByValAttr() || Arg.hasStructRetAttr() ||
      DL->getTypeStoreSize(ArgTy->getPointerElementType()) > MaxOutArgSizeBytes) {
    return false;
  }

  return checkArgumentUses(Arg);
}
Exemple #5
0
vector<const TargetRegisterInfo*> ipaFindUsedReturns(ParameterRegistry& registry, Function& function, const vector<const TargetRegisterInfo*>& returns)
{
	// Excuse entry points from not having callers; use every return.
	if (function.use_empty())
	if (auto address = md::getVirtualAddress(function))
	if (isEntryPoint(address->getLimitedValue()))
	{
		return returns;
	}
	
	// Otherwise, loop through callers and see which registers are used after the function call.
	TargetInfo& targetInfo = registry.getTargetInfo();
	SmallPtrSet<MemoryPhi*, 4> visited;
	vector<const TargetRegisterInfo*> result;
	for (auto& use : function.uses())
	{
		if (auto call = dyn_cast<CallInst>(use.getUser()))
		{
			auto parentFunction = call->getParent()->getParent();
			if (parentFunction == &function)
			{
				// TODO: This isn't impossible to compute, just somewhat inconvenient.
				continue;
			}
			
			Argument* parentArgs = parentFunction->arg_begin();
			auto pointerType = dyn_cast<PointerType>(parentArgs->getType());
			assert(pointerType != nullptr && pointerType->getTypeAtIndex(int(0))->getStructName() == "struct.x86_regs");
			
			visited.clear();
			MemorySSA& mssa = *registry.getMemorySSA(*parentFunction);
			findUsedReturns(returns, targetInfo, mssa, visited, *mssa.getMemoryAccess(call), result);
		}
	}
	return result;
}
Exemple #6
0
bool CallingConvention_x86_64_systemv::analyzeFunction(ParameterRegistry &registry, CallInformation &callInfo, Function &function)
{
	// TODO: Look at called functions to find hidden parameters/return values
	
	if (md::isPrototype(function))
	{
		return false;
	}
	
	TargetInfo& targetInfo = registry.getTargetInfo();
	
	// We always need rip and rsp.
	callInfo.addParameter(ValueInformation::IntegerRegister, targetInfo.registerNamed("rip"));
	callInfo.addParameter(ValueInformation::IntegerRegister, targetInfo.registerNamed("rsp"));
	
	// Identify register GEPs.
	// (assume x86 regs as first parameter)
	assert(function.arg_size() == 1);
	Argument* regs = function.arg_begin();
	auto pointerType = dyn_cast<PointerType>(regs->getType());
	assert(pointerType != nullptr && pointerType->getTypeAtIndex(int(0))->getStructName() == "struct.x86_regs");
	
	unordered_multimap<const TargetRegisterInfo*, GetElementPtrInst*> geps;
	for (auto& use : regs->uses())
	{
		if (GetElementPtrInst* gep = dyn_cast<GetElementPtrInst>(use.getUser()))
		if (const TargetRegisterInfo* regName = targetInfo.registerInfo(*gep))
		{
			geps.insert({regName, gep});
		}
	}
	
	// Look at temporary registers that are read before they are written
	MemorySSA& mssa = *registry.getMemorySSA(function);
	for (const char* name : parameterRegisters)
	{
		const TargetRegisterInfo* smallReg = targetInfo.registerNamed(name);
		const TargetRegisterInfo* regInfo = targetInfo.largestOverlappingRegister(*smallReg);
		auto range = geps.equal_range(regInfo);
		
		vector<Instruction*> addresses;
		for (auto iter = range.first; iter != range.second; ++iter)
		{
			addresses.push_back(iter->second);
		}
		
		for (size_t i = 0; i < addresses.size(); ++i)
		{
			Instruction* addressInst = addresses[i];
			for (auto& use : addressInst->uses())
			{
				if (auto load = dyn_cast<LoadInst>(use.getUser()))
				{
					MemoryAccess* parent = mssa.getMemoryAccess(load)->getDefiningAccess();
					if (mssa.isLiveOnEntryDef(parent))
					{
						// register argument!
						callInfo.addParameter(ValueInformation::IntegerRegister, regInfo);
					}
				}
				else if (auto cast = dyn_cast<CastInst>(use.getUser()))
				{
					if (cast->getType()->isPointerTy())
					{
						addresses.push_back(cast);
					}
				}
			}
		}
	}
	
	// Does the function refer to values at an offset above the initial rsp value?
	// Assume that rsp is known to be preserved.
	auto spRange = geps.equal_range(targetInfo.getStackPointer());
	for (auto iter = spRange.first; iter != spRange.second; ++iter)
	{
		auto* gep = iter->second;
		// Find all uses of reference to sp register
		for (auto& use : gep->uses())
		{
			if (auto load = dyn_cast<LoadInst>(use.getUser()))
			{
				// Find uses above +8 (since +0 is the return address)
				for (auto& use : load->uses())
				{
					ConstantInt* offset = nullptr;
					if (match(use.get(), m_Add(m_Value(), m_ConstantInt(offset))))
					{
						make_signed<decltype(offset->getLimitedValue())>::type intOffset = offset->getLimitedValue();
						if (intOffset > 8)
						{
							// memory argument!
							callInfo.addParameter(ValueInformation::Stack, intOffset);
						}
					}
				}
			}
		}
	}
	
	// Are we using return registers?
	vector<const TargetRegisterInfo*> usedReturns;
	usedReturns.reserve(2);
	
	for (const char* name : returnRegisters)
	{
		const TargetRegisterInfo* regInfo = targetInfo.registerNamed(name);
		auto range = geps.equal_range(regInfo);
		for (auto iter = range.first; iter != range.second; ++iter)
		{
			bool hasStore = any_of(iter->second->use_begin(), iter->second->use_end(), [](Use& use)
			{
				return isa<StoreInst>(use.getUser());
			});
			
			if (hasStore)
			{
				usedReturns.push_back(regInfo);
				break;
			}
		}
	}
	
	for (const TargetRegisterInfo* reg : ipaFindUsedReturns(registry, function, usedReturns))
	{
		// return value!
		callInfo.addReturn(ValueInformation::IntegerRegister, reg);
	}
	
	return true;
}
Exemple #7
0
void Lint::visitCallSite(CallSite CS) {
  Instruction &I = *CS.getInstruction();
  Value *Callee = CS.getCalledValue();

  visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,
                       MemRef::Callee);

  if (Function *F = dyn_cast<Function>(findValue(Callee,
                                                 /*OffsetOk=*/false))) {
    Assert(CS.getCallingConv() == F->getCallingConv(),
           "Undefined behavior: Caller and callee calling convention differ",
           &I);

    FunctionType *FT = F->getFunctionType();
    unsigned NumActualArgs = CS.arg_size();

    Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
                          : FT->getNumParams() == NumActualArgs,
           "Undefined behavior: Call argument count mismatches callee "
           "argument count",
           &I);

    Assert(FT->getReturnType() == I.getType(),
           "Undefined behavior: Call return type mismatches "
           "callee return type",
           &I);

    // Check argument types (in case the callee was casted) and attributes.
    // TODO: Verify that caller and callee attributes are compatible.
    Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
    CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
    for (; AI != AE; ++AI) {
      Value *Actual = *AI;
      if (PI != PE) {
        Argument *Formal = &*PI++;
        Assert(Formal->getType() == Actual->getType(),
               "Undefined behavior: Call argument type mismatches "
               "callee parameter type",
               &I);

        // Check that noalias arguments don't alias other arguments. This is
        // not fully precise because we don't know the sizes of the dereferenced
        // memory regions.
        if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
            if (AI != BI && (*BI)->getType()->isPointerTy()) {
              AliasResult Result = AA->alias(*AI, *BI);
              Assert(Result != MustAlias && Result != PartialAlias,
                     "Unusual: noalias argument aliases another argument", &I);
            }

        // Check that an sret argument points to valid memory.
        if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
          Type *Ty =
            cast<PointerType>(Formal->getType())->getElementType();
          visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty),
                               DL->getABITypeAlignment(Ty), Ty,
                               MemRef::Read | MemRef::Write);
        }
      }
    }
  }

  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
    for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
         AI != AE; ++AI) {
      Value *Obj = findValue(*AI, /*OffsetOk=*/true);
      Assert(!isa<AllocaInst>(Obj),
             "Undefined behavior: Call with \"tail\" keyword references "
             "alloca",
             &I);
    }


  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
    switch (II->getIntrinsicID()) {
    default: break;

    // TODO: Check more intrinsics

    case Intrinsic::memcpy: {
      MemCpyInst *MCI = cast<MemCpyInst>(&I);
      // TODO: If the size is known, use it.
      visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize,
                           MCI->getAlignment(), nullptr, MemRef::Write);
      visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize,
                           MCI->getAlignment(), nullptr, MemRef::Read);

      // Check that the memcpy arguments don't overlap. The AliasAnalysis API
      // isn't expressive enough for what we really want to do. Known partial
      // overlap is not distinguished from the case where nothing is known.
      uint64_t Size = 0;
      if (const ConstantInt *Len =
              dyn_cast<ConstantInt>(findValue(MCI->getLength(),
                                              /*OffsetOk=*/false)))
        if (Len->getValue().isIntN(32))
          Size = Len->getValue().getZExtValue();
      Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
                 MustAlias,
             "Undefined behavior: memcpy source and destination overlap", &I);
      break;
    }
    case Intrinsic::memmove: {
      MemMoveInst *MMI = cast<MemMoveInst>(&I);
      // TODO: If the size is known, use it.
      visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize,
                           MMI->getAlignment(), nullptr, MemRef::Write);
      visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize,
                           MMI->getAlignment(), nullptr, MemRef::Read);
      break;
    }
    case Intrinsic::memset: {
      MemSetInst *MSI = cast<MemSetInst>(&I);
      // TODO: If the size is known, use it.
      visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize,
                           MSI->getAlignment(), nullptr, MemRef::Write);
      break;
    }

    case Intrinsic::vastart:
      Assert(I.getParent()->getParent()->isVarArg(),
             "Undefined behavior: va_start called in a non-varargs function",
             &I);

      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Read | MemRef::Write);
      break;
    case Intrinsic::vacopy:
      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Write);
      visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Read);
      break;
    case Intrinsic::vaend:
      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Read | MemRef::Write);
      break;

    case Intrinsic::stackrestore:
      // Stackrestore doesn't read or write memory, but it sets the
      // stack pointer, which the compiler may read from or write to
      // at any time, so check it for both readability and writeability.
      visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                           nullptr, MemRef::Read | MemRef::Write);
      break;
    }
}
Exemple #8
0
/// PromoteArguments - This method checks the specified function to see if there
/// are any promotable arguments and if it is safe to promote the function (for
/// example, all callers are direct).  If safe to promote some arguments, it
/// calls the DoPromotion method.
///
CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
  Function *F = CGN->getFunction();

  // Make sure that it is local to this module.
  if (!F || !F->hasLocalLinkage()) return 0;

  // First check: see if there are any pointer arguments!  If not, quick exit.
  SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs;
  unsigned ArgNo = 0;
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
       I != E; ++I, ++ArgNo)
    if (I->getType()->isPointerTy())
      PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
  if (PointerArgs.empty()) return 0;

  // Second check: make sure that all callers are direct callers.  We can't
  // transform functions that have indirect callers.
  if (F->hasAddressTaken())
    return 0;

  // Check to see which arguments are promotable.  If an argument is promotable,
  // add it to ArgsToPromote.
  SmallPtrSet<Argument*, 8> ArgsToPromote;
  SmallPtrSet<Argument*, 8> ByValArgsToTransform;
  for (unsigned i = 0; i != PointerArgs.size(); ++i) {
    bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);

    // If this is a byval argument, and if the aggregate type is small, just
    // pass the elements, which is always safe.
    Argument *PtrArg = PointerArgs[i].first;
    if (isByVal) {
      const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
      if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
        if (maxElements > 0 && STy->getNumElements() > maxElements) {
          DEBUG(dbgs() << "argpromotion disable promoting argument '"
                << PtrArg->getName() << "' because it would require adding more"
                << " than " << maxElements << " arguments to the function.\n");
        } else {
          // If all the elements are single-value types, we can promote it.
          bool AllSimple = true;
          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
            if (!STy->getElementType(i)->isSingleValueType()) {
              AllSimple = false;
              break;
            }

          // Safe to transform, don't even bother trying to "promote" it.
          // Passing the elements as a scalar will allow scalarrepl to hack on
          // the new alloca we introduce.
          if (AllSimple) {
            ByValArgsToTransform.insert(PtrArg);
            continue;
          }
        }
      }
    }

    // Otherwise, see if we can promote the pointer to its value.
    if (isSafeToPromoteArgument(PtrArg, isByVal))
      ArgsToPromote.insert(PtrArg);
  }

  // No promotable pointer arguments.
  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 
    return 0;

  return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
}
Exemple #9
0
/// PromoteArguments - This method checks the specified function to see if there
/// are any promotable arguments and if it is safe to promote the function (for
/// example, all callers are direct).  If safe to promote some arguments, it
/// calls the DoPromotion method.
///
CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
  Function *F = CGN->getFunction();

  // Make sure that it is local to this module.
  if (!F || !F->hasLocalLinkage()) return nullptr;

  // First check: see if there are any pointer arguments!  If not, quick exit.
  SmallVector<Argument*, 16> PointerArgs;
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
    if (I->getType()->isPointerTy())
      PointerArgs.push_back(I);
  if (PointerArgs.empty()) return nullptr;

  // Second check: make sure that all callers are direct callers.  We can't
  // transform functions that have indirect callers.  Also see if the function
  // is self-recursive.
  bool isSelfRecursive = false;
  for (Use &U : F->uses()) {
    CallSite CS(U.getUser());
    // Must be a direct call.
    if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
    
    if (CS.getInstruction()->getParent()->getParent() == F)
      isSelfRecursive = true;
  }
  
  // Don't promote arguments for variadic functions. Adding, removing, or
  // changing non-pack parameters can change the classification of pack
  // parameters. Frontends encode that classification at the call site in the
  // IR, while in the callee the classification is determined dynamically based
  // on the number of registers consumed so far.
  if (F->isVarArg()) return nullptr;

  // Check to see which arguments are promotable.  If an argument is promotable,
  // add it to ArgsToPromote.
  SmallPtrSet<Argument*, 8> ArgsToPromote;
  SmallPtrSet<Argument*, 8> ByValArgsToTransform;
  for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) {
    Argument *PtrArg = PointerArgs[i];
    Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();

    // If this is a byval argument, and if the aggregate type is small, just
    // pass the elements, which is always safe, if the passed value is densely
    // packed or if we can prove the padding bytes are never accessed. This does
    // not apply to inalloca.
    bool isSafeToPromote =
      PtrArg->hasByValAttr() &&
      (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg));
    if (isSafeToPromote) {
      if (StructType *STy = dyn_cast<StructType>(AgTy)) {
        if (maxElements > 0 && STy->getNumElements() > maxElements) {
          DEBUG(dbgs() << "argpromotion disable promoting argument '"
                << PtrArg->getName() << "' because it would require adding more"
                << " than " << maxElements << " arguments to the function.\n");
          continue;
        }
        
        // If all the elements are single-value types, we can promote it.
        bool AllSimple = true;
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          if (!STy->getElementType(i)->isSingleValueType()) {
            AllSimple = false;
            break;
          }
        }

        // Safe to transform, don't even bother trying to "promote" it.
        // Passing the elements as a scalar will allow scalarrepl to hack on
        // the new alloca we introduce.
        if (AllSimple) {
          ByValArgsToTransform.insert(PtrArg);
          continue;
        }
      }
    }

    // If the argument is a recursive type and we're in a recursive
    // function, we could end up infinitely peeling the function argument.
    if (isSelfRecursive) {
      if (StructType *STy = dyn_cast<StructType>(AgTy)) {
        bool RecursiveType = false;
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          if (STy->getElementType(i) == PtrArg->getType()) {
            RecursiveType = true;
            break;
          }
        }
        if (RecursiveType)
          continue;
      }
    }
    
    // Otherwise, see if we can promote the pointer to its value.
    if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr()))
      ArgsToPromote.insert(PtrArg);
  }

  // No promotable pointer arguments.
  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 
    return nullptr;

  return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
}
/// PromoteArguments - This method checks the specified function to see if there
/// are any promotable arguments and if it is safe to promote the function (for
/// example, all callers are direct).  If safe to promote some arguments, it
/// calls the DoPromotion method.
///
CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
  Function *F = CGN->getFunction();

  // Make sure that it is local to this module.
  if (!F || !F->hasLocalLinkage()) return nullptr;

  // Don't promote arguments for variadic functions. Adding, removing, or
  // changing non-pack parameters can change the classification of pack
  // parameters. Frontends encode that classification at the call site in the
  // IR, while in the callee the classification is determined dynamically based
  // on the number of registers consumed so far.
  if (F->isVarArg()) return nullptr;

  // First check: see if there are any pointer arguments!  If not, quick exit.
  SmallVector<Argument*, 16> PointerArgs;
  for (Argument &I : F->args())
    if (I.getType()->isPointerTy())
      PointerArgs.push_back(&I);
  if (PointerArgs.empty()) return nullptr;

  // Second check: make sure that all callers are direct callers.  We can't
  // transform functions that have indirect callers.  Also see if the function
  // is self-recursive.
  bool isSelfRecursive = false;
  for (Use &U : F->uses()) {
    CallSite CS(U.getUser());
    // Must be a direct call.
    if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
    
    if (CS.getInstruction()->getParent()->getParent() == F)
      isSelfRecursive = true;
  }
  
  const DataLayout &DL = F->getParent()->getDataLayout();

  // We need to manually construct BasicAA directly in order to disable its use
  // of other function analyses.
  BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F));

  // Construct our own AA results for this function. We do this manually to
  // work around the limitations of the legacy pass manager.
  AAResults AAR(createLegacyPMAAResults(*this, *F, BAR));

  // Check to see which arguments are promotable.  If an argument is promotable,
  // add it to ArgsToPromote.
  SmallPtrSet<Argument*, 8> ArgsToPromote;
  SmallPtrSet<Argument*, 8> ByValArgsToTransform;
  for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) {
    Argument *PtrArg = PointerArgs[i];
    Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();

    // Replace sret attribute with noalias. This reduces register pressure by
    // avoiding a register copy.
    if (PtrArg->hasStructRetAttr()) {
      unsigned ArgNo = PtrArg->getArgNo();
      F->setAttributes(
          F->getAttributes()
              .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet)
              .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
      for (Use &U : F->uses()) {
        CallSite CS(U.getUser());
        CS.setAttributes(
            CS.getAttributes()
                .removeAttribute(F->getContext(), ArgNo + 1,
                                 Attribute::StructRet)
                .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias));
      }
    }

    // If this is a byval argument, and if the aggregate type is small, just
    // pass the elements, which is always safe, if the passed value is densely
    // packed or if we can prove the padding bytes are never accessed. This does
    // not apply to inalloca.
    bool isSafeToPromote =
        PtrArg->hasByValAttr() &&
        (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
    if (isSafeToPromote) {
      if (StructType *STy = dyn_cast<StructType>(AgTy)) {
        if (maxElements > 0 && STy->getNumElements() > maxElements) {
          DEBUG(dbgs() << "argpromotion disable promoting argument '"
                << PtrArg->getName() << "' because it would require adding more"
                << " than " << maxElements << " arguments to the function.\n");
          continue;
        }
        
        // If all the elements are single-value types, we can promote it.
        bool AllSimple = true;
        for (const auto *EltTy : STy->elements()) {
          if (!EltTy->isSingleValueType()) {
            AllSimple = false;
            break;
          }
        }

        // Safe to transform, don't even bother trying to "promote" it.
        // Passing the elements as a scalar will allow sroa to hack on
        // the new alloca we introduce.
        if (AllSimple) {
          ByValArgsToTransform.insert(PtrArg);
          continue;
        }
      }
    }

    // If the argument is a recursive type and we're in a recursive
    // function, we could end up infinitely peeling the function argument.
    if (isSelfRecursive) {
      if (StructType *STy = dyn_cast<StructType>(AgTy)) {
        bool RecursiveType = false;
        for (const auto *EltTy : STy->elements()) {
          if (EltTy == PtrArg->getType()) {
            RecursiveType = true;
            break;
          }
        }
        if (RecursiveType)
          continue;
      }
    }
    
    // Otherwise, see if we can promote the pointer to its value.
    if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR))
      ArgsToPromote.insert(PtrArg);
  }

  // No promotable pointer arguments.
  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 
    return nullptr;

  return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
}
Exemple #11
0
void HeterotbbTransform::gen_opt_code_per_f (Function* NF, Function* F) {
    // Get the names of the parameters for old function
    Function::arg_iterator FI = F->arg_begin();
    Argument *classname = &*FI;
    FI++;
    Argument *numiters = &*FI;

    // Set the names of the parameters for new function
    Function::arg_iterator DestI = NF->arg_begin();
    DestI->setName(classname->getName());
    Argument *class_name = &(*DestI);
    //second argument
    DestI++;
    DestI->setName(numiters->getName());
    Argument *num_iters = &(*DestI);

#ifdef EXPLICIT_REWRITE
    DenseMap<const Value*, Value *> ValueMap;
#else
    ValueToValueMapTy ValueMap;
#endif

#if EXPLICIT_REWRITE
    //get the old basic block and create a new one
    Function::const_iterator BI = F->begin();
    const BasicBlock &FB = *BI;
    BasicBlock *NFBB = BasicBlock::Create(FB.getContext(), "", NF);
    if (FB.hasName()) {
        NFBB->setName(FB.getName());
        //DEBUG(dbgs()<<FB.getName()<<"\n");
    }
    ValueMap[&FB] = NFBB;

    ValueMap[numiters] = num_iters;
    //must create a new instruction which casts i32* back to the class name
    CastInst *StrucRevCast = CastInst::Create(Instruction::BitCast, class_name,
                             classname->getType(), classname->getName(), NFBB);
    ValueMap[classname] = StrucRevCast;


    for (BasicBlock::const_iterator II = FB.begin(), IE = FB.end(); II != IE; ++II) {
        Instruction *NFInst = II->clone(/*F->getContext()*/);
        //	DEBUG(dbgs()<<*II<<"\n");
        if (II->hasName()) NFInst->setName(II->getName());
        const Instruction *FInst = &(*II);
        rewrite_instruction((Instruction *)FInst, NFInst, ValueMap);
        NFBB->getInstList().push_back(NFInst);
        ValueMap[II] = NFInst;
    }
    BI++;

    for (Function::const_iterator /*BI=F->begin(),*/BE = F->end(); BI != BE; ++BI) {
        const BasicBlock &FBB = *BI;
        BasicBlock *NFBB = BasicBlock::Create(FBB.getContext(), "", NF);
        ValueMap[&FBB] = NFBB;
        if (FBB.hasName()) {
            NFBB->setName(FBB.getName());
            //DEBUG(dbgs()<<NFBB->getName()<<"\n");
        }
        for (BasicBlock::const_iterator II = FBB.begin(), IE = FBB.end(); II != IE; ++II) {
            Instruction *NFInst = II->clone(/*F->getContext()*/);
            if (II->hasName()) NFInst->setName(II->getName());
            const Instruction *FInst = &(*II);
            rewrite_instruction((Instruction *)FInst, NFInst, ValueMap);
            NFBB->getInstList().push_back(NFInst);
            ValueMap[II] = NFInst;
        }
    }
    // Remap the instructions again to take care of forward jumps
    for (Function::iterator BB = NF->begin(), BE=NF->end(); BB != BE; ++ BB) {
        for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) {
            int opIdx = 0;
            //DEBUG(dbgs()<<*II<<"\n");
            for (User::op_iterator i = II->op_begin(), e = II->op_end(); i != e; ++i, opIdx++) {
                Value *V = *i;
                if (ValueMap[V] != NULL) {
                    II->setOperand(opIdx, ValueMap[V]);
                }
            }
        }
    }
#else
    Function::const_iterator BI = F->begin();
    const BasicBlock &FB = *BI;
    BasicBlock *NFBB = BasicBlock::Create(FB.getContext(), "", NF);
    if (FB.hasName()) {
        NFBB->setName(FB.getName());
    }
    ValueMap[&FB] = NFBB;
    CastInst *StrucRevCast = CastInst::Create(Instruction::BitCast, class_name,
                             classname->getType(), classname->getName(), NFBB);
    ValueMap[classname] = StrucRevCast;
    ValueMap[numiters] = num_iters;
    CloneFunctionWithExistingBBInto(NF, NFBB, F, ValueMap, "");
#endif
}
Exemple #12
0
        bool runOnModule(Module &M) override
        {
            // LOAD: look at each generated function, each a load is followed by writing
            // to a pointer argument with attribute denoting it to be write channel "CHANNELWR"
            // we change the load:
            // 0. replace the original CHANNELWR channel with an address port and a size port (optional)
            // 1. in the absence of burst, replace the load instruction with an address write to
            // the address port
            // 2. in the presence of burst, move load outside of the involved loop and make one
            // address write + one size write
            // 3. add in new function to read memory and write to fifo....the same fifo the downstream
            // guys are reading -- this newly added function would break them into reasonable bursts
            // STORE: let's not do this first
            // 0. replace the original store with an address port and a size port(optional) and a data port
            // 1. in the case of burst, address req get moved outside but actual data is written into the
            // data port as they get created
            // newly created memory access function
            errs()<<"into func run\n";
            std::vector<Function*> memoryAccessFunctions;
            // top level functions layout pipeline accessed at the end
            std::vector<Function*> pipelineLevelFunctions;
            std::vector<Function*> topLevelFunctions;
            for(auto funcIter = M.begin(); funcIter!=M.end(); funcIter++)
            {
                Function& curFunc = *funcIter;
                if(!curFunc.hasFnAttribute(GENERATEDATTR))
                {
                    if(curFunc.hasFnAttribute(TRANSFORMEDATTR))
                        topLevelFunctions.push_back(funcIter);
                    continue;
                }
                LoopInfo* funcLI=&getAnalysis<LoopInfo>(curFunc);
                // iterate through the basicblocks and see if the loaded value
                // is written to a channel out put -- we do not convert stores
                // the argument involved here are all old arguments
                std::map<Instruction*, Argument*> load2Port;
                std::set<Argument*> addressArg;
                std::set<Argument*> burstedArg;
                std::map<Instruction*, Argument*> store2Port;
                for(auto bbIter = curFunc.begin(); bbIter!= curFunc.end(); bbIter++)
                {
                    BasicBlock& curBB = *bbIter;
                    for(auto insIter = curBB.begin(); insIter!=curBB.end(); insIter++)
                    {
                        Instruction& curIns = *insIter;
                        if(isa<LoadInst>(curIns))
                        {
                            LoadInst& li = cast<LoadInst>(curIns);
                            // we check if the result of this is directly written to an output port
                            // using store
                            int numUser = std::distance(curIns.user_begin(),curIns.user_end());
                            if(numUser==1 )
                            {
                                auto soleUserIter = curIns.user_begin();
                                if(isa<StoreInst>(*soleUserIter))
                                {
                                    StoreInst* si = cast<StoreInst>(*soleUserIter);
                                    Value* written2 = si->getPointerOperand();
                                    if(isa<Argument>(*written2))
                                    {
                                        Argument& channelArg = cast<Argument>(*written2);
                                        // make sure this is wrchannel
                                        if(isArgChannel(&channelArg))
                                        {
                                            load2Port[&li] = &channelArg;
                                            addressArg.insert(&channelArg);
                                            if(burstAccess&& analyzeLoadBurstable(&li,funcLI))
                                                burstedArg.insert(&channelArg);

                                        }
                                    }
                                }
                            }
                        }
                        //FIXME: not doing storeInst
                        else if(isa<StoreInst>(curIns))
                        {

                        }
                    }
                }
                // now we have the loadInst which will be converted to pipelined mem access
                // we need to create a bunch of new functions -- we then use these new functions
                // in our new top levels --- after which everything original is deleted
                std::string functionName = curFunc.getName();
                functionName += "MemTrans";
                Type* rtType = curFunc.getReturnType();
                // old to new argument map
                std::map<Argument*,Argument*> oldDataFifoArg2newAddrArg;
                // these are arguments of the newly created function
                std::map<Argument*,Argument*> addressArg2SizeArg;
                std::vector<Type*> paramsType;

                for(auto argIter = curFunc.arg_begin();argIter!=curFunc.arg_end();argIter++)
                {
                    Argument* curArg = &cast<Argument>(*argIter);
                    paramsType.push_back(curArg->getType());
                }
                for(int numBurstSize = 0; numBurstSize<burstedArg.size();numBurstSize++)
                {
                    paramsType.push_back(PointerType::get(Type::getInt32Ty(M.getContext()),0));
                }
                FunctionType* newFuncType = FunctionType::get(rtType,ArrayRef<Type*>(paramsType),false);
                Constant* newFunc = M.getOrInsertFunction(functionName, newFuncType  );
                Function* memTransFunc = cast<Function>(newFunc);

                auto  newArgIter = memTransFunc->arg_begin();
                for(auto oldArgIter = curFunc.arg_begin();
                    oldArgIter!=curFunc.arg_end();
                    oldArgIter++, newArgIter++)
                {
                    Argument* oldArg = &cast<Argument>(*oldArgIter);
                    Argument* newArg = &cast<Argument>(*newArgIter);
                    oldDataFifoArg2newAddrArg[oldArg] = newArg;
                }
                auto burstedArgIter = burstedArg.begin();
                while(newArgIter!=memTransFunc->arg_end())
                {
                    Argument* newBurstArg = &cast<Argument>(*newArgIter);
                    Argument* originalDataFifoArg = *burstedArgIter;
                    Argument* newAddressArg = oldDataFifoArg2newAddrArg[originalDataFifoArg];
                    addressArg2SizeArg[newAddressArg] = newBurstArg;
                    newArgIter++;
                    burstedArgIter++;
                }

                // if bursted access is in a loop, we want to take it out of the loop
                // make it pre-header -- to do this, we associate each loadIns with
                std::map<BasicBlock*,std::vector<Instruction*>*> bb2BurstedLoads;
                for(auto load2PortIter = load2Port.begin(); load2PortIter!=load2Port.end(); load2PortIter++)
                {
                    Instruction* ldInst = load2PortIter->first;
                    Argument* ldArg = load2PortIter->second;
                    BasicBlock* ldParent = ldInst->getParent();
                    if(burstedArg.count(ldArg))
                    {
                        // this load is to be bursted
                        if(!bb2BurstedLoads.count(ldParent))
                            bb2BurstedLoads[ldParent] = new std::vector<Instruction*>();
                        bb2BurstedLoads[ldParent]->push_back(ldInst);
                    }
                }
                // now memTransFunc is the new function
                // we will now populate it, we mirror everybb
                std::map<BasicBlock*,BasicBlock*> oldBB2NewBB;
                // also we need a few preheaders to do the burst load
                for(auto bbIter = curFunc.begin(); bbIter!= curFunc.end(); bbIter++)
                {
                    BasicBlock& oldBB = *bbIter;


                }

                // FIXME: release bb2BurstedLoads

            }

            return false;

        }
Exemple #13
0
// Initialise symbolic values concerning the arguments to the specialisation root function.
// Pesimistically these could point to anything, but we might be able to show they can't alias one another
// (a la C99 restrict pointers).
void LLPEAnalysisPass::createPointerArguments(InlineAttempt* IA) {

  // Try to establish if any incoming pointer arguments are known not to alias
  // the globals, or each other. If so, allocate each a heap slot.

  std::vector<std::vector<Value*> > argAllocSites;
  
  Function::arg_iterator AI = IA->F.arg_begin(), AE = IA->F.arg_end();
  for(uint32_t i = 0; AI != AE; ++i, ++AI) {

    argAllocSites.push_back(std::vector<Value*>());

    Argument* A = AI;
    if(A->getType()->isPointerTy()) {

      ImprovedValSetSingle* IVS = cast<ImprovedValSetSingle>(IA->argShadows[i].i.PB);
      if(IVS->SetType == ValSetTypeOldOverdef) {

	std::vector<Value*>& allocs = argAllocSites.back();

	if(forceNoAliasArgs.count(i)) {

	  // Not an allocation site, but doesn't matter for this purpose:
	  // This will force us to conclude the argument doesn't alias globals
	  // or any other arguments.
	  allocs.push_back(A);

	}
	else {

	  // This will leave argAllocSites empty on failure:
	  getAllocSites(A, allocs);

	}

	if(!allocs.empty()) {

	  IVS->SetType = ValSetTypePB;

	  // Create a new heap location for this argument if it has any non-global constituents.
	  // Just note any globals in the alias list.
	  bool anyNonGlobals = false;
	  for(std::vector<Value*>::iterator it = allocs.begin(), itend = allocs.end(); it != itend; ++it) {
	    
	    if(GlobalVariable* GV = dyn_cast<GlobalVariable>(*it)) {
	      
	      ShadowGV* SGV = &shadowGlobals[getShadowGlobalIndex(GV)];
	      IVS->Values.push_back(ImprovedVal(ShadowValue(SGV), 0));

	    }
	    else if(!anyNonGlobals) {

	      // Create location:
	      argStores[i] = ArgStore(heap.size());
	      heap.push_back(AllocData());
	      heap.back().allocIdx = heap.size() - 1;
	      heap.back().isCommitted = false;
	      heap.back().allocValue = ShadowValue(&IA->argShadows[i]);
	      heap.back().allocType = IA->argShadows[i].getType();
	      anyNonGlobals = true;

	    }

	  }

	}

      }

    }
    
  }

  // Now for each argument for which we found a bounded set of alloc sites,
  // give it an initial pointer set corresponding to each other arg it may alias.

  for(uint32_t i = 0, ilim = IA->F.arg_size(); i != ilim; ++i) {

    ImprovedValSetSingle* IVS = cast<ImprovedValSetSingle>(IA->argShadows[i].i.PB);
    std::vector<Value*>& allocs = argAllocSites[i];
    if(!allocs.empty()) {

      // Add each pointer argument location this /may/ alias:
      for(uint32_t j = 0, jlim = IA->F.arg_size(); j != jlim; ++j) {
	
	if(!argAllocSites[j].empty()) {
	  
	  std::vector<Value*>& otherallocs = argAllocSites[j];
	  for(std::vector<Value*>::iterator it = otherallocs.begin(), itend = otherallocs.end(); it != itend; ++it) {
	    
	    if(isa<GlobalVariable>(*it))
	      continue;

	    if(std::find(allocs.begin(), allocs.end(), *it) != allocs.end()) {

	      // i and j share a non-global allocation site, so arg i may alias arg j.
	      
	      IVS->Values.push_back(ImprovedVal(ShadowValue(&IA->argShadows[j]), 0));
	      break;

	    }
    
	  }

	}

      }

    }

  }

}
/// PromoteArguments - This method checks the specified function to see if there
/// are any promotable arguments and if it is safe to promote the function (for
/// example, all callers are direct).  If safe to promote some arguments, it
/// calls the DoPromotion method.
///
CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
  Function *F = CGN->getFunction();

  // Make sure that it is local to this module.
  if (!F || !F->hasLocalLinkage()) return 0;

  // First check: see if there are any pointer arguments!  If not, quick exit.
  SmallVector<Argument*, 16> PointerArgs;
  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
    if (I->getType()->isPointerTy())
      PointerArgs.push_back(I);
  if (PointerArgs.empty()) return 0;

  // Second check: make sure that all callers are direct callers.  We can't
  // transform functions that have indirect callers.  Also see if the function
  // is self-recursive.
  bool isSelfRecursive = false;
  for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
       UI != E; ++UI) {
    CallSite CS(*UI);
    // Must be a direct call.
    if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0;
    
    if (CS.getInstruction()->getParent()->getParent() == F)
      isSelfRecursive = true;
  }
  
  // Check to see which arguments are promotable.  If an argument is promotable,
  // add it to ArgsToPromote.
  SmallPtrSet<Argument*, 8> ArgsToPromote;
  SmallPtrSet<Argument*, 8> ByValArgsToTransform;
  for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) {
    Argument *PtrArg = PointerArgs[i];
    Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();

    // If this is a byval argument, and if the aggregate type is small, just
    // pass the elements, which is always safe.
    if (PtrArg->hasByValAttr()) {
      if (StructType *STy = dyn_cast<StructType>(AgTy)) {
        if (maxElements > 0 && STy->getNumElements() > maxElements) {
          DEBUG(dbgs() << "argpromotion disable promoting argument '"
                << PtrArg->getName() << "' because it would require adding more"
                << " than " << maxElements << " arguments to the function.\n");
          continue;
        }
        
        // If all the elements are single-value types, we can promote it.
        bool AllSimple = true;
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          if (!STy->getElementType(i)->isSingleValueType()) {
            AllSimple = false;
            break;
          }
        }

        // Safe to transform, don't even bother trying to "promote" it.
        // Passing the elements as a scalar will allow scalarrepl to hack on
        // the new alloca we introduce.
        if (AllSimple) {
          ByValArgsToTransform.insert(PtrArg);
          continue;
        }
      }
    }

    // If the argument is a recursive type and we're in a recursive
    // function, we could end up infinitely peeling the function argument.
    if (isSelfRecursive) {
      if (StructType *STy = dyn_cast<StructType>(AgTy)) {
        bool RecursiveType = false;
        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
          if (STy->getElementType(i) == PtrArg->getType()) {
            RecursiveType = true;
            break;
          }
        }
        if (RecursiveType)
          continue;
      }
    }
    
    // Otherwise, see if we can promote the pointer to its value.
    if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr()))
      ArgsToPromote.insert(PtrArg);
  }

  // No promotable pointer arguments.
  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 
    return 0;

  return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
}
bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
  if (skipFunction(F))
    return false;

  // TODO: Could probably handle variadic functions.
  if (F.isVarArg() || F.hasStructRetAttr() ||
      AMDGPU::isEntryFunctionCC(F.getCallingConv()))
    return false;

  MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();

  unsigned ReturnNumRegs = 0;
  SmallSet<int, 4> OutArgIndexes;
  SmallVector<Type *, 4> ReturnTypes;
  Type *RetTy = F.getReturnType();
  if (!RetTy->isVoidTy()) {
    ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4;

    if (ReturnNumRegs >= MaxNumRetRegs)
      return false;

    ReturnTypes.push_back(RetTy);
  }

  SmallVector<Argument *, 4> OutArgs;
  for (Argument &Arg : F.args()) {
    if (isOutArgumentCandidate(Arg)) {
      LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg
                        << " in function " << F.getName() << '\n');
      OutArgs.push_back(&Arg);
    }
  }

  if (OutArgs.empty())
    return false;

  using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>;

  DenseMap<ReturnInst *, ReplacementVec> Replacements;

  SmallVector<ReturnInst *, 4> Returns;
  for (BasicBlock &BB : F) {
    if (ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back()))
      Returns.push_back(RI);
  }

  if (Returns.empty())
    return false;

  bool Changing;

  do {
    Changing = false;

    // Keep retrying if we are able to successfully eliminate an argument. This
    // helps with cases with multiple arguments which may alias, such as in a
    // sincos implemntation. If we have 2 stores to arguments, on the first
    // attempt the MDA query will succeed for the second store but not the
    // first. On the second iteration we've removed that out clobbering argument
    // (by effectively moving it into another function) and will find the second
    // argument is OK to move.
    for (Argument *OutArg : OutArgs) {
      bool ThisReplaceable = true;
      SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores;

      Type *ArgTy = OutArg->getType()->getPointerElementType();

      // Skip this argument if converting it will push us over the register
      // count to return limit.

      // TODO: This is an approximation. When legalized this could be more. We
      // can ask TLI for exactly how many.
      unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4;
      if (ArgNumRegs + ReturnNumRegs > MaxNumRetRegs)
        continue;

      // An argument is convertible only if all exit blocks are able to replace
      // it.
      for (ReturnInst *RI : Returns) {
        BasicBlock *BB = RI->getParent();

        MemDepResult Q = MDA->getPointerDependencyFrom(MemoryLocation(OutArg),
                                                       true, BB->end(), BB, RI);
        StoreInst *SI = nullptr;
        if (Q.isDef())
          SI = dyn_cast<StoreInst>(Q.getInst());

        if (SI) {
          LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n');
          ReplaceableStores.emplace_back(RI, SI);
        } else {
          ThisReplaceable = false;
          break;
        }
      }

      if (!ThisReplaceable)
        continue; // Try the next argument candidate.

      for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) {
        Value *ReplVal = Store.second->getValueOperand();

        auto &ValVec = Replacements[Store.first];
        if (llvm::find_if(ValVec,
              [OutArg](const std::pair<Argument *, Value *> &Entry) {
                 return Entry.first == OutArg;}) != ValVec.end()) {
          LLVM_DEBUG(dbgs()
                     << "Saw multiple out arg stores" << *OutArg << '\n');
          // It is possible to see stores to the same argument multiple times,
          // but we expect these would have been optimized out already.
          ThisReplaceable = false;
          break;
        }

        ValVec.emplace_back(OutArg, ReplVal);
        Store.second->eraseFromParent();
      }

      if (ThisReplaceable) {
        ReturnTypes.push_back(ArgTy);
        OutArgIndexes.insert(OutArg->getArgNo());
        ++NumOutArgumentsReplaced;
        Changing = true;
      }
    }
  } while (Changing);

  if (Replacements.empty())
    return false;

  LLVMContext &Ctx = F.getParent()->getContext();
  StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName());

  FunctionType *NewFuncTy = FunctionType::get(NewRetTy,
                                              F.getFunctionType()->params(),
                                              F.isVarArg());

  LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n');

  Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage,
                                       F.getName() + ".body");
  F.getParent()->getFunctionList().insert(F.getIterator(), NewFunc);
  NewFunc->copyAttributesFrom(&F);
  NewFunc->setComdat(F.getComdat());

  // We want to preserve the function and param attributes, but need to strip
  // off any return attributes, e.g. zeroext doesn't make sense with a struct.
  NewFunc->stealArgumentListFrom(F);

  AttrBuilder RetAttrs;
  RetAttrs.addAttribute(Attribute::SExt);
  RetAttrs.addAttribute(Attribute::ZExt);
  RetAttrs.addAttribute(Attribute::NoAlias);
  NewFunc->removeAttributes(AttributeList::ReturnIndex, RetAttrs);
  // TODO: How to preserve metadata?

  // Move the body of the function into the new rewritten function, and replace
  // this function with a stub.
  NewFunc->getBasicBlockList().splice(NewFunc->begin(), F.getBasicBlockList());

  for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) {
    ReturnInst *RI = Replacement.first;
    IRBuilder<> B(RI);
    B.SetCurrentDebugLocation(RI->getDebugLoc());

    int RetIdx = 0;
    Value *NewRetVal = UndefValue::get(NewRetTy);

    Value *RetVal = RI->getReturnValue();
    if (RetVal)
      NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);

    for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) {
      Argument *Arg = ReturnPoint.first;
      Value *Val = ReturnPoint.second;
      Type *EltTy = Arg->getType()->getPointerElementType();
      if (Val->getType() != EltTy) {
        Type *EffectiveEltTy = EltTy;
        if (StructType *CT = dyn_cast<StructType>(EltTy)) {
          assert(CT->getNumElements() == 1);
          EffectiveEltTy = CT->getElementType(0);
        }

        if (DL->getTypeSizeInBits(EffectiveEltTy) !=
            DL->getTypeSizeInBits(Val->getType())) {
          assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType()));
          Val = B.CreateShuffleVector(Val, UndefValue::get(Val->getType()),
                                      { 0, 1, 2 });
        }

        Val = B.CreateBitCast(Val, EffectiveEltTy);

        // Re-create single element composite.
        if (EltTy != EffectiveEltTy)
          Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0);
      }

      NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++);
    }

    if (RetVal)
      RI->setOperand(0, NewRetVal);
    else {
      B.CreateRet(NewRetVal);
      RI->eraseFromParent();
    }
  }

  SmallVector<Value *, 16> StubCallArgs;
  for (Argument &Arg : F.args()) {
    if (OutArgIndexes.count(Arg.getArgNo())) {
      // It's easier to preserve the type of the argument list. We rely on
      // DeadArgumentElimination to take care of these.
      StubCallArgs.push_back(UndefValue::get(Arg.getType()));
    } else {
      StubCallArgs.push_back(&Arg);
    }
  }

  BasicBlock *StubBB = BasicBlock::Create(Ctx, "", &F);
  IRBuilder<> B(StubBB);
  CallInst *StubCall = B.CreateCall(NewFunc, StubCallArgs);

  int RetIdx = RetTy->isVoidTy() ? 0 : 1;
  for (Argument &Arg : F.args()) {
    if (!OutArgIndexes.count(Arg.getArgNo()))
      continue;

    PointerType *ArgType = cast<PointerType>(Arg.getType());

    auto *EltTy = ArgType->getElementType();
    unsigned Align = Arg.getParamAlignment();
    if (Align == 0)
      Align = DL->getABITypeAlignment(EltTy);

    Value *Val = B.CreateExtractValue(StubCall, RetIdx++);
    Type *PtrTy = Val->getType()->getPointerTo(ArgType->getAddressSpace());

    // We can peek through bitcasts, so the type may not match.
    Value *PtrVal = B.CreateBitCast(&Arg, PtrTy);

    B.CreateAlignedStore(Val, PtrVal, Align);
  }

  if (!RetTy->isVoidTy()) {
    B.CreateRet(B.CreateExtractValue(StubCall, 0));
  } else {
    B.CreateRetVoid();
  }

  // The function is now a stub we want to inline.
  F.addFnAttr(Attribute::AlwaysInline);

  ++NumOutArgumentFunctionsReplaced;
  return true;
}