bool HexagonRemoveExtendArgs::runOnFunction(Function &F) { unsigned Idx = 1; for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI, ++Idx) { if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) { Argument* Arg = AI; if (!isa<PointerType>(Arg->getType())) { for (Instruction::use_iterator UI = Arg->use_begin(); UI != Arg->use_end();) { if (isa<SExtInst>(*UI)) { Instruction* Use = cast<Instruction>(*UI); SExtInst* SI = new SExtInst(Arg, Use->getType()); assert (EVT::getEVT(SI->getType()) == (EVT::getEVT(Use->getType()))); ++UI; Use->replaceAllUsesWith(SI); Instruction* First = F.getEntryBlock().begin(); SI->insertBefore(First); Use->eraseFromParent(); } else { ++UI; } } } } } return true; }
SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { // no interprocedural analysis is done at the moment if (!A.hasByValAttr()) { ++ObjectVisitorArgument; return unknown(); } PointerType *PT = cast<PointerType>(A.getType()); APInt Size(IntTyBits, TD->getTypeAllocSize(PT->getElementType())); return std::make_pair(align(Size, A.getParamAlignment()), Zero); }
ArgumentArea::ArgumentArea(QWidget *parent, Argument arg) : KHBox(parent) { prefix = arg.getPrefix(); QLabel *label = new QLabel(arg.getName(), this); label->setToolTip(arg.getDescription()); if(arg.getOptional() == true){ QCheckBox *enabled = new QCheckBox("", this); enabled->setCheckState(Qt::Unchecked); if(arg.getType() == Argument::Switch){ connect(enabled, SIGNAL(stateChanged(int)), this, SLOT(switchUpdate(int))); }
bool AMDGPURewriteOutArguments::isOutArgumentCandidate(Argument &Arg) const { const unsigned MaxOutArgSizeBytes = 4 * MaxNumRetRegs; PointerType *ArgTy = dyn_cast<PointerType>(Arg.getType()); // TODO: It might be useful for any out arguments, not just privates. if (!ArgTy || (ArgTy->getAddressSpace() != DL->getAllocaAddrSpace() && !AnyAddressSpace) || Arg.hasByValAttr() || Arg.hasStructRetAttr() || DL->getTypeStoreSize(ArgTy->getPointerElementType()) > MaxOutArgSizeBytes) { return false; } return checkArgumentUses(Arg); }
vector<const TargetRegisterInfo*> ipaFindUsedReturns(ParameterRegistry& registry, Function& function, const vector<const TargetRegisterInfo*>& returns) { // Excuse entry points from not having callers; use every return. if (function.use_empty()) if (auto address = md::getVirtualAddress(function)) if (isEntryPoint(address->getLimitedValue())) { return returns; } // Otherwise, loop through callers and see which registers are used after the function call. TargetInfo& targetInfo = registry.getTargetInfo(); SmallPtrSet<MemoryPhi*, 4> visited; vector<const TargetRegisterInfo*> result; for (auto& use : function.uses()) { if (auto call = dyn_cast<CallInst>(use.getUser())) { auto parentFunction = call->getParent()->getParent(); if (parentFunction == &function) { // TODO: This isn't impossible to compute, just somewhat inconvenient. continue; } Argument* parentArgs = parentFunction->arg_begin(); auto pointerType = dyn_cast<PointerType>(parentArgs->getType()); assert(pointerType != nullptr && pointerType->getTypeAtIndex(int(0))->getStructName() == "struct.x86_regs"); visited.clear(); MemorySSA& mssa = *registry.getMemorySSA(*parentFunction); findUsedReturns(returns, targetInfo, mssa, visited, *mssa.getMemoryAccess(call), result); } } return result; }
bool CallingConvention_x86_64_systemv::analyzeFunction(ParameterRegistry ®istry, CallInformation &callInfo, Function &function) { // TODO: Look at called functions to find hidden parameters/return values if (md::isPrototype(function)) { return false; } TargetInfo& targetInfo = registry.getTargetInfo(); // We always need rip and rsp. callInfo.addParameter(ValueInformation::IntegerRegister, targetInfo.registerNamed("rip")); callInfo.addParameter(ValueInformation::IntegerRegister, targetInfo.registerNamed("rsp")); // Identify register GEPs. // (assume x86 regs as first parameter) assert(function.arg_size() == 1); Argument* regs = function.arg_begin(); auto pointerType = dyn_cast<PointerType>(regs->getType()); assert(pointerType != nullptr && pointerType->getTypeAtIndex(int(0))->getStructName() == "struct.x86_regs"); unordered_multimap<const TargetRegisterInfo*, GetElementPtrInst*> geps; for (auto& use : regs->uses()) { if (GetElementPtrInst* gep = dyn_cast<GetElementPtrInst>(use.getUser())) if (const TargetRegisterInfo* regName = targetInfo.registerInfo(*gep)) { geps.insert({regName, gep}); } } // Look at temporary registers that are read before they are written MemorySSA& mssa = *registry.getMemorySSA(function); for (const char* name : parameterRegisters) { const TargetRegisterInfo* smallReg = targetInfo.registerNamed(name); const TargetRegisterInfo* regInfo = targetInfo.largestOverlappingRegister(*smallReg); auto range = geps.equal_range(regInfo); vector<Instruction*> addresses; for (auto iter = range.first; iter != range.second; ++iter) { addresses.push_back(iter->second); } for (size_t i = 0; i < addresses.size(); ++i) { Instruction* addressInst = addresses[i]; for (auto& use : addressInst->uses()) { if (auto load = dyn_cast<LoadInst>(use.getUser())) { MemoryAccess* parent = mssa.getMemoryAccess(load)->getDefiningAccess(); if (mssa.isLiveOnEntryDef(parent)) { // register argument! callInfo.addParameter(ValueInformation::IntegerRegister, regInfo); } } else if (auto cast = dyn_cast<CastInst>(use.getUser())) { if (cast->getType()->isPointerTy()) { addresses.push_back(cast); } } } } } // Does the function refer to values at an offset above the initial rsp value? // Assume that rsp is known to be preserved. auto spRange = geps.equal_range(targetInfo.getStackPointer()); for (auto iter = spRange.first; iter != spRange.second; ++iter) { auto* gep = iter->second; // Find all uses of reference to sp register for (auto& use : gep->uses()) { if (auto load = dyn_cast<LoadInst>(use.getUser())) { // Find uses above +8 (since +0 is the return address) for (auto& use : load->uses()) { ConstantInt* offset = nullptr; if (match(use.get(), m_Add(m_Value(), m_ConstantInt(offset)))) { make_signed<decltype(offset->getLimitedValue())>::type intOffset = offset->getLimitedValue(); if (intOffset > 8) { // memory argument! callInfo.addParameter(ValueInformation::Stack, intOffset); } } } } } } // Are we using return registers? vector<const TargetRegisterInfo*> usedReturns; usedReturns.reserve(2); for (const char* name : returnRegisters) { const TargetRegisterInfo* regInfo = targetInfo.registerNamed(name); auto range = geps.equal_range(regInfo); for (auto iter = range.first; iter != range.second; ++iter) { bool hasStore = any_of(iter->second->use_begin(), iter->second->use_end(), [](Use& use) { return isa<StoreInst>(use.getUser()); }); if (hasStore) { usedReturns.push_back(regInfo); break; } } } for (const TargetRegisterInfo* reg : ipaFindUsedReturns(registry, function, usedReturns)) { // return value! callInfo.addReturn(ValueInformation::IntegerRegister, reg); } return true; }
void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr, MemRef::Callee); if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { Assert(CS.getCallingConv() == F->getCallingConv(), "Undefined behavior: Caller and callee calling convention differ", &I); FunctionType *FT = F->getFunctionType(); unsigned NumActualArgs = CS.arg_size(); Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs : FT->getNumParams() == NumActualArgs, "Undefined behavior: Call argument count mismatches callee " "argument count", &I); Assert(FT->getReturnType() == I.getType(), "Undefined behavior: Call return type mismatches " "callee return type", &I); // Check argument types (in case the callee was casted) and attributes. // TODO: Verify that caller and callee attributes are compatible. Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); for (; AI != AE; ++AI) { Value *Actual = *AI; if (PI != PE) { Argument *Formal = &*PI++; Assert(Formal->getType() == Actual->getType(), "Undefined behavior: Call argument type mismatches " "callee parameter type", &I); // Check that noalias arguments don't alias other arguments. This is // not fully precise because we don't know the sizes of the dereferenced // memory regions. if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasResult Result = AA->alias(*AI, *BI); Assert(Result != MustAlias && Result != PartialAlias, "Unusual: noalias argument aliases another argument", &I); } // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), DL->getABITypeAlignment(Ty), Ty, MemRef::Read | MemRef::Write); } } } } if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { Value *Obj = findValue(*AI, /*OffsetOk=*/true); Assert(!isa<AllocaInst>(Obj), "Undefined behavior: Call with \"tail\" keyword references " "alloca", &I); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) switch (II->getIntrinsicID()) { default: break; // TODO: Check more intrinsics case Intrinsic::memcpy: { MemCpyInst *MCI = cast<MemCpyInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, MCI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, MCI->getAlignment(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial // overlap is not distinguished from the case where nothing is known. uint64_t Size = 0; if (const ConstantInt *Len = dyn_cast<ConstantInt>(findValue(MCI->getLength(), /*OffsetOk=*/false))) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != MustAlias, "Undefined behavior: memcpy source and destination overlap", &I); break; } case Intrinsic::memmove: { MemMoveInst *MMI = cast<MemMoveInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize, MMI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize, MMI->getAlignment(), nullptr, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast<MemSetInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize, MSI->getAlignment(), nullptr, MemRef::Write); break; } case Intrinsic::vastart: Assert(I.getParent()->getParent()->isVarArg(), "Undefined behavior: va_start called in a non-varargs function", &I); visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Write); visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read); break; case Intrinsic::vaend: visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::stackrestore: // Stackrestore doesn't read or write memory, but it sets the // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; } }
/// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. if (!F || !F->hasLocalLinkage()) return 0; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs; unsigned ArgNo = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++ArgNo) if (I->getType()->isPointerTy()) PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo)); if (PointerArgs.empty()) return 0; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. if (F->hasAddressTaken()) return 0; // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; for (unsigned i = 0; i != PointerArgs.size(); ++i) { bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal); // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe. Argument *PtrArg = PointerArgs[i].first; if (isByVal) { const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); if (const StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); } else { // If all the elements are single-value types, we can promote it. bool AllSimple = true; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) if (!STy->getElementType(i)->isSingleValueType()) { AllSimple = false; break; } // Safe to transform, don't even bother trying to "promote" it. // Passing the elements as a scalar will allow scalarrepl to hack on // the new alloca we introduce. if (AllSimple) { ByValArgsToTransform.insert(PtrArg); continue; } } } } // Otherwise, see if we can promote the pointer to its value. if (isSafeToPromoteArgument(PtrArg, isByVal)) ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return 0; return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); }
/// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. if (!F || !F->hasLocalLinkage()) return nullptr; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<Argument*, 16> PointerArgs; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) if (I->getType()->isPointerTy()) PointerArgs.push_back(I); if (PointerArgs.empty()) return nullptr; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function // is self-recursive. bool isSelfRecursive = false; for (Use &U : F->uses()) { CallSite CS(U.getUser()); // Must be a direct call. if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; } // Don't promote arguments for variadic functions. Adding, removing, or // changing non-pack parameters can change the classification of pack // parameters. Frontends encode that classification at the call site in the // IR, while in the callee the classification is determined dynamically based // on the number of registers consumed so far. if (F->isVarArg()) return nullptr; // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) { Argument *PtrArg = PointerArgs[i]; Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe, if the passed value is densely // packed or if we can prove the padding bytes are never accessed. This does // not apply to inalloca. bool isSafeToPromote = PtrArg->hasByValAttr() && (isDenselyPacked(AgTy) || !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); continue; } // If all the elements are single-value types, we can promote it. bool AllSimple = true; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { if (!STy->getElementType(i)->isSingleValueType()) { AllSimple = false; break; } } // Safe to transform, don't even bother trying to "promote" it. // Passing the elements as a scalar will allow scalarrepl to hack on // the new alloca we introduce. if (AllSimple) { ByValArgsToTransform.insert(PtrArg); continue; } } } // If the argument is a recursive type and we're in a recursive // function, we could end up infinitely peeling the function argument. if (isSelfRecursive) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { bool RecursiveType = false; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { if (STy->getElementType(i) == PtrArg->getType()) { RecursiveType = true; break; } } if (RecursiveType) continue; } } // Otherwise, see if we can promote the pointer to its value. if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr())) ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); }
/// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. if (!F || !F->hasLocalLinkage()) return nullptr; // Don't promote arguments for variadic functions. Adding, removing, or // changing non-pack parameters can change the classification of pack // parameters. Frontends encode that classification at the call site in the // IR, while in the callee the classification is determined dynamically based // on the number of registers consumed so far. if (F->isVarArg()) return nullptr; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<Argument*, 16> PointerArgs; for (Argument &I : F->args()) if (I.getType()->isPointerTy()) PointerArgs.push_back(&I); if (PointerArgs.empty()) return nullptr; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function // is self-recursive. bool isSelfRecursive = false; for (Use &U : F->uses()) { CallSite CS(U.getUser()); // Must be a direct call. if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; } const DataLayout &DL = F->getParent()->getDataLayout(); // We need to manually construct BasicAA directly in order to disable its use // of other function analyses. BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F)); // Construct our own AA results for this function. We do this manually to // work around the limitations of the legacy pass manager. AAResults AAR(createLegacyPMAAResults(*this, *F, BAR)); // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) { Argument *PtrArg = PointerArgs[i]; Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // Replace sret attribute with noalias. This reduces register pressure by // avoiding a register copy. if (PtrArg->hasStructRetAttr()) { unsigned ArgNo = PtrArg->getArgNo(); F->setAttributes( F->getAttributes() .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); for (Use &U : F->uses()) { CallSite CS(U.getUser()); CS.setAttributes( CS.getAttributes() .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); } } // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe, if the passed value is densely // packed or if we can prove the padding bytes are never accessed. This does // not apply to inalloca. bool isSafeToPromote = PtrArg->hasByValAttr() && (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); continue; } // If all the elements are single-value types, we can promote it. bool AllSimple = true; for (const auto *EltTy : STy->elements()) { if (!EltTy->isSingleValueType()) { AllSimple = false; break; } } // Safe to transform, don't even bother trying to "promote" it. // Passing the elements as a scalar will allow sroa to hack on // the new alloca we introduce. if (AllSimple) { ByValArgsToTransform.insert(PtrArg); continue; } } } // If the argument is a recursive type and we're in a recursive // function, we could end up infinitely peeling the function argument. if (isSelfRecursive) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { bool RecursiveType = false; for (const auto *EltTy : STy->elements()) { if (EltTy == PtrArg->getType()) { RecursiveType = true; break; } } if (RecursiveType) continue; } } // Otherwise, see if we can promote the pointer to its value. if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR)) ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); }
void HeterotbbTransform::gen_opt_code_per_f (Function* NF, Function* F) { // Get the names of the parameters for old function Function::arg_iterator FI = F->arg_begin(); Argument *classname = &*FI; FI++; Argument *numiters = &*FI; // Set the names of the parameters for new function Function::arg_iterator DestI = NF->arg_begin(); DestI->setName(classname->getName()); Argument *class_name = &(*DestI); //second argument DestI++; DestI->setName(numiters->getName()); Argument *num_iters = &(*DestI); #ifdef EXPLICIT_REWRITE DenseMap<const Value*, Value *> ValueMap; #else ValueToValueMapTy ValueMap; #endif #if EXPLICIT_REWRITE //get the old basic block and create a new one Function::const_iterator BI = F->begin(); const BasicBlock &FB = *BI; BasicBlock *NFBB = BasicBlock::Create(FB.getContext(), "", NF); if (FB.hasName()) { NFBB->setName(FB.getName()); //DEBUG(dbgs()<<FB.getName()<<"\n"); } ValueMap[&FB] = NFBB; ValueMap[numiters] = num_iters; //must create a new instruction which casts i32* back to the class name CastInst *StrucRevCast = CastInst::Create(Instruction::BitCast, class_name, classname->getType(), classname->getName(), NFBB); ValueMap[classname] = StrucRevCast; for (BasicBlock::const_iterator II = FB.begin(), IE = FB.end(); II != IE; ++II) { Instruction *NFInst = II->clone(/*F->getContext()*/); // DEBUG(dbgs()<<*II<<"\n"); if (II->hasName()) NFInst->setName(II->getName()); const Instruction *FInst = &(*II); rewrite_instruction((Instruction *)FInst, NFInst, ValueMap); NFBB->getInstList().push_back(NFInst); ValueMap[II] = NFInst; } BI++; for (Function::const_iterator /*BI=F->begin(),*/BE = F->end(); BI != BE; ++BI) { const BasicBlock &FBB = *BI; BasicBlock *NFBB = BasicBlock::Create(FBB.getContext(), "", NF); ValueMap[&FBB] = NFBB; if (FBB.hasName()) { NFBB->setName(FBB.getName()); //DEBUG(dbgs()<<NFBB->getName()<<"\n"); } for (BasicBlock::const_iterator II = FBB.begin(), IE = FBB.end(); II != IE; ++II) { Instruction *NFInst = II->clone(/*F->getContext()*/); if (II->hasName()) NFInst->setName(II->getName()); const Instruction *FInst = &(*II); rewrite_instruction((Instruction *)FInst, NFInst, ValueMap); NFBB->getInstList().push_back(NFInst); ValueMap[II] = NFInst; } } // Remap the instructions again to take care of forward jumps for (Function::iterator BB = NF->begin(), BE=NF->end(); BB != BE; ++ BB) { for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) { int opIdx = 0; //DEBUG(dbgs()<<*II<<"\n"); for (User::op_iterator i = II->op_begin(), e = II->op_end(); i != e; ++i, opIdx++) { Value *V = *i; if (ValueMap[V] != NULL) { II->setOperand(opIdx, ValueMap[V]); } } } } #else Function::const_iterator BI = F->begin(); const BasicBlock &FB = *BI; BasicBlock *NFBB = BasicBlock::Create(FB.getContext(), "", NF); if (FB.hasName()) { NFBB->setName(FB.getName()); } ValueMap[&FB] = NFBB; CastInst *StrucRevCast = CastInst::Create(Instruction::BitCast, class_name, classname->getType(), classname->getName(), NFBB); ValueMap[classname] = StrucRevCast; ValueMap[numiters] = num_iters; CloneFunctionWithExistingBBInto(NF, NFBB, F, ValueMap, ""); #endif }
bool runOnModule(Module &M) override { // LOAD: look at each generated function, each a load is followed by writing // to a pointer argument with attribute denoting it to be write channel "CHANNELWR" // we change the load: // 0. replace the original CHANNELWR channel with an address port and a size port (optional) // 1. in the absence of burst, replace the load instruction with an address write to // the address port // 2. in the presence of burst, move load outside of the involved loop and make one // address write + one size write // 3. add in new function to read memory and write to fifo....the same fifo the downstream // guys are reading -- this newly added function would break them into reasonable bursts // STORE: let's not do this first // 0. replace the original store with an address port and a size port(optional) and a data port // 1. in the case of burst, address req get moved outside but actual data is written into the // data port as they get created // newly created memory access function errs()<<"into func run\n"; std::vector<Function*> memoryAccessFunctions; // top level functions layout pipeline accessed at the end std::vector<Function*> pipelineLevelFunctions; std::vector<Function*> topLevelFunctions; for(auto funcIter = M.begin(); funcIter!=M.end(); funcIter++) { Function& curFunc = *funcIter; if(!curFunc.hasFnAttribute(GENERATEDATTR)) { if(curFunc.hasFnAttribute(TRANSFORMEDATTR)) topLevelFunctions.push_back(funcIter); continue; } LoopInfo* funcLI=&getAnalysis<LoopInfo>(curFunc); // iterate through the basicblocks and see if the loaded value // is written to a channel out put -- we do not convert stores // the argument involved here are all old arguments std::map<Instruction*, Argument*> load2Port; std::set<Argument*> addressArg; std::set<Argument*> burstedArg; std::map<Instruction*, Argument*> store2Port; for(auto bbIter = curFunc.begin(); bbIter!= curFunc.end(); bbIter++) { BasicBlock& curBB = *bbIter; for(auto insIter = curBB.begin(); insIter!=curBB.end(); insIter++) { Instruction& curIns = *insIter; if(isa<LoadInst>(curIns)) { LoadInst& li = cast<LoadInst>(curIns); // we check if the result of this is directly written to an output port // using store int numUser = std::distance(curIns.user_begin(),curIns.user_end()); if(numUser==1 ) { auto soleUserIter = curIns.user_begin(); if(isa<StoreInst>(*soleUserIter)) { StoreInst* si = cast<StoreInst>(*soleUserIter); Value* written2 = si->getPointerOperand(); if(isa<Argument>(*written2)) { Argument& channelArg = cast<Argument>(*written2); // make sure this is wrchannel if(isArgChannel(&channelArg)) { load2Port[&li] = &channelArg; addressArg.insert(&channelArg); if(burstAccess&& analyzeLoadBurstable(&li,funcLI)) burstedArg.insert(&channelArg); } } } } } //FIXME: not doing storeInst else if(isa<StoreInst>(curIns)) { } } } // now we have the loadInst which will be converted to pipelined mem access // we need to create a bunch of new functions -- we then use these new functions // in our new top levels --- after which everything original is deleted std::string functionName = curFunc.getName(); functionName += "MemTrans"; Type* rtType = curFunc.getReturnType(); // old to new argument map std::map<Argument*,Argument*> oldDataFifoArg2newAddrArg; // these are arguments of the newly created function std::map<Argument*,Argument*> addressArg2SizeArg; std::vector<Type*> paramsType; for(auto argIter = curFunc.arg_begin();argIter!=curFunc.arg_end();argIter++) { Argument* curArg = &cast<Argument>(*argIter); paramsType.push_back(curArg->getType()); } for(int numBurstSize = 0; numBurstSize<burstedArg.size();numBurstSize++) { paramsType.push_back(PointerType::get(Type::getInt32Ty(M.getContext()),0)); } FunctionType* newFuncType = FunctionType::get(rtType,ArrayRef<Type*>(paramsType),false); Constant* newFunc = M.getOrInsertFunction(functionName, newFuncType ); Function* memTransFunc = cast<Function>(newFunc); auto newArgIter = memTransFunc->arg_begin(); for(auto oldArgIter = curFunc.arg_begin(); oldArgIter!=curFunc.arg_end(); oldArgIter++, newArgIter++) { Argument* oldArg = &cast<Argument>(*oldArgIter); Argument* newArg = &cast<Argument>(*newArgIter); oldDataFifoArg2newAddrArg[oldArg] = newArg; } auto burstedArgIter = burstedArg.begin(); while(newArgIter!=memTransFunc->arg_end()) { Argument* newBurstArg = &cast<Argument>(*newArgIter); Argument* originalDataFifoArg = *burstedArgIter; Argument* newAddressArg = oldDataFifoArg2newAddrArg[originalDataFifoArg]; addressArg2SizeArg[newAddressArg] = newBurstArg; newArgIter++; burstedArgIter++; } // if bursted access is in a loop, we want to take it out of the loop // make it pre-header -- to do this, we associate each loadIns with std::map<BasicBlock*,std::vector<Instruction*>*> bb2BurstedLoads; for(auto load2PortIter = load2Port.begin(); load2PortIter!=load2Port.end(); load2PortIter++) { Instruction* ldInst = load2PortIter->first; Argument* ldArg = load2PortIter->second; BasicBlock* ldParent = ldInst->getParent(); if(burstedArg.count(ldArg)) { // this load is to be bursted if(!bb2BurstedLoads.count(ldParent)) bb2BurstedLoads[ldParent] = new std::vector<Instruction*>(); bb2BurstedLoads[ldParent]->push_back(ldInst); } } // now memTransFunc is the new function // we will now populate it, we mirror everybb std::map<BasicBlock*,BasicBlock*> oldBB2NewBB; // also we need a few preheaders to do the burst load for(auto bbIter = curFunc.begin(); bbIter!= curFunc.end(); bbIter++) { BasicBlock& oldBB = *bbIter; } // FIXME: release bb2BurstedLoads } return false; }
// Initialise symbolic values concerning the arguments to the specialisation root function. // Pesimistically these could point to anything, but we might be able to show they can't alias one another // (a la C99 restrict pointers). void LLPEAnalysisPass::createPointerArguments(InlineAttempt* IA) { // Try to establish if any incoming pointer arguments are known not to alias // the globals, or each other. If so, allocate each a heap slot. std::vector<std::vector<Value*> > argAllocSites; Function::arg_iterator AI = IA->F.arg_begin(), AE = IA->F.arg_end(); for(uint32_t i = 0; AI != AE; ++i, ++AI) { argAllocSites.push_back(std::vector<Value*>()); Argument* A = AI; if(A->getType()->isPointerTy()) { ImprovedValSetSingle* IVS = cast<ImprovedValSetSingle>(IA->argShadows[i].i.PB); if(IVS->SetType == ValSetTypeOldOverdef) { std::vector<Value*>& allocs = argAllocSites.back(); if(forceNoAliasArgs.count(i)) { // Not an allocation site, but doesn't matter for this purpose: // This will force us to conclude the argument doesn't alias globals // or any other arguments. allocs.push_back(A); } else { // This will leave argAllocSites empty on failure: getAllocSites(A, allocs); } if(!allocs.empty()) { IVS->SetType = ValSetTypePB; // Create a new heap location for this argument if it has any non-global constituents. // Just note any globals in the alias list. bool anyNonGlobals = false; for(std::vector<Value*>::iterator it = allocs.begin(), itend = allocs.end(); it != itend; ++it) { if(GlobalVariable* GV = dyn_cast<GlobalVariable>(*it)) { ShadowGV* SGV = &shadowGlobals[getShadowGlobalIndex(GV)]; IVS->Values.push_back(ImprovedVal(ShadowValue(SGV), 0)); } else if(!anyNonGlobals) { // Create location: argStores[i] = ArgStore(heap.size()); heap.push_back(AllocData()); heap.back().allocIdx = heap.size() - 1; heap.back().isCommitted = false; heap.back().allocValue = ShadowValue(&IA->argShadows[i]); heap.back().allocType = IA->argShadows[i].getType(); anyNonGlobals = true; } } } } } } // Now for each argument for which we found a bounded set of alloc sites, // give it an initial pointer set corresponding to each other arg it may alias. for(uint32_t i = 0, ilim = IA->F.arg_size(); i != ilim; ++i) { ImprovedValSetSingle* IVS = cast<ImprovedValSetSingle>(IA->argShadows[i].i.PB); std::vector<Value*>& allocs = argAllocSites[i]; if(!allocs.empty()) { // Add each pointer argument location this /may/ alias: for(uint32_t j = 0, jlim = IA->F.arg_size(); j != jlim; ++j) { if(!argAllocSites[j].empty()) { std::vector<Value*>& otherallocs = argAllocSites[j]; for(std::vector<Value*>::iterator it = otherallocs.begin(), itend = otherallocs.end(); it != itend; ++it) { if(isa<GlobalVariable>(*it)) continue; if(std::find(allocs.begin(), allocs.end(), *it) != allocs.end()) { // i and j share a non-global allocation site, so arg i may alias arg j. IVS->Values.push_back(ImprovedVal(ShadowValue(&IA->argShadows[j]), 0)); break; } } } } } } }
/// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. if (!F || !F->hasLocalLinkage()) return 0; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<Argument*, 16> PointerArgs; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) if (I->getType()->isPointerTy()) PointerArgs.push_back(I); if (PointerArgs.empty()) return 0; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function // is self-recursive. bool isSelfRecursive = false; for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E; ++UI) { CallSite CS(*UI); // Must be a direct call. if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0; if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; } // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) { Argument *PtrArg = PointerArgs[i]; Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe. if (PtrArg->hasByValAttr()) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); continue; } // If all the elements are single-value types, we can promote it. bool AllSimple = true; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { if (!STy->getElementType(i)->isSingleValueType()) { AllSimple = false; break; } } // Safe to transform, don't even bother trying to "promote" it. // Passing the elements as a scalar will allow scalarrepl to hack on // the new alloca we introduce. if (AllSimple) { ByValArgsToTransform.insert(PtrArg); continue; } } } // If the argument is a recursive type and we're in a recursive // function, we could end up infinitely peeling the function argument. if (isSelfRecursive) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { bool RecursiveType = false; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { if (STy->getElementType(i) == PtrArg->getType()) { RecursiveType = true; break; } } if (RecursiveType) continue; } } // Otherwise, see if we can promote the pointer to its value. if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValAttr())) ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return 0; return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); }
bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { if (skipFunction(F)) return false; // TODO: Could probably handle variadic functions. if (F.isVarArg() || F.hasStructRetAttr() || AMDGPU::isEntryFunctionCC(F.getCallingConv())) return false; MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); unsigned ReturnNumRegs = 0; SmallSet<int, 4> OutArgIndexes; SmallVector<Type *, 4> ReturnTypes; Type *RetTy = F.getReturnType(); if (!RetTy->isVoidTy()) { ReturnNumRegs = DL->getTypeStoreSize(RetTy) / 4; if (ReturnNumRegs >= MaxNumRetRegs) return false; ReturnTypes.push_back(RetTy); } SmallVector<Argument *, 4> OutArgs; for (Argument &Arg : F.args()) { if (isOutArgumentCandidate(Arg)) { LLVM_DEBUG(dbgs() << "Found possible out argument " << Arg << " in function " << F.getName() << '\n'); OutArgs.push_back(&Arg); } } if (OutArgs.empty()) return false; using ReplacementVec = SmallVector<std::pair<Argument *, Value *>, 4>; DenseMap<ReturnInst *, ReplacementVec> Replacements; SmallVector<ReturnInst *, 4> Returns; for (BasicBlock &BB : F) { if (ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back())) Returns.push_back(RI); } if (Returns.empty()) return false; bool Changing; do { Changing = false; // Keep retrying if we are able to successfully eliminate an argument. This // helps with cases with multiple arguments which may alias, such as in a // sincos implemntation. If we have 2 stores to arguments, on the first // attempt the MDA query will succeed for the second store but not the // first. On the second iteration we've removed that out clobbering argument // (by effectively moving it into another function) and will find the second // argument is OK to move. for (Argument *OutArg : OutArgs) { bool ThisReplaceable = true; SmallVector<std::pair<ReturnInst *, StoreInst *>, 4> ReplaceableStores; Type *ArgTy = OutArg->getType()->getPointerElementType(); // Skip this argument if converting it will push us over the register // count to return limit. // TODO: This is an approximation. When legalized this could be more. We // can ask TLI for exactly how many. unsigned ArgNumRegs = DL->getTypeStoreSize(ArgTy) / 4; if (ArgNumRegs + ReturnNumRegs > MaxNumRetRegs) continue; // An argument is convertible only if all exit blocks are able to replace // it. for (ReturnInst *RI : Returns) { BasicBlock *BB = RI->getParent(); MemDepResult Q = MDA->getPointerDependencyFrom(MemoryLocation(OutArg), true, BB->end(), BB, RI); StoreInst *SI = nullptr; if (Q.isDef()) SI = dyn_cast<StoreInst>(Q.getInst()); if (SI) { LLVM_DEBUG(dbgs() << "Found out argument store: " << *SI << '\n'); ReplaceableStores.emplace_back(RI, SI); } else { ThisReplaceable = false; break; } } if (!ThisReplaceable) continue; // Try the next argument candidate. for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) { Value *ReplVal = Store.second->getValueOperand(); auto &ValVec = Replacements[Store.first]; if (llvm::find_if(ValVec, [OutArg](const std::pair<Argument *, Value *> &Entry) { return Entry.first == OutArg;}) != ValVec.end()) { LLVM_DEBUG(dbgs() << "Saw multiple out arg stores" << *OutArg << '\n'); // It is possible to see stores to the same argument multiple times, // but we expect these would have been optimized out already. ThisReplaceable = false; break; } ValVec.emplace_back(OutArg, ReplVal); Store.second->eraseFromParent(); } if (ThisReplaceable) { ReturnTypes.push_back(ArgTy); OutArgIndexes.insert(OutArg->getArgNo()); ++NumOutArgumentsReplaced; Changing = true; } } } while (Changing); if (Replacements.empty()) return false; LLVMContext &Ctx = F.getParent()->getContext(); StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName()); FunctionType *NewFuncTy = FunctionType::get(NewRetTy, F.getFunctionType()->params(), F.isVarArg()); LLVM_DEBUG(dbgs() << "Computed new return type: " << *NewRetTy << '\n'); Function *NewFunc = Function::Create(NewFuncTy, Function::PrivateLinkage, F.getName() + ".body"); F.getParent()->getFunctionList().insert(F.getIterator(), NewFunc); NewFunc->copyAttributesFrom(&F); NewFunc->setComdat(F.getComdat()); // We want to preserve the function and param attributes, but need to strip // off any return attributes, e.g. zeroext doesn't make sense with a struct. NewFunc->stealArgumentListFrom(F); AttrBuilder RetAttrs; RetAttrs.addAttribute(Attribute::SExt); RetAttrs.addAttribute(Attribute::ZExt); RetAttrs.addAttribute(Attribute::NoAlias); NewFunc->removeAttributes(AttributeList::ReturnIndex, RetAttrs); // TODO: How to preserve metadata? // Move the body of the function into the new rewritten function, and replace // this function with a stub. NewFunc->getBasicBlockList().splice(NewFunc->begin(), F.getBasicBlockList()); for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) { ReturnInst *RI = Replacement.first; IRBuilder<> B(RI); B.SetCurrentDebugLocation(RI->getDebugLoc()); int RetIdx = 0; Value *NewRetVal = UndefValue::get(NewRetTy); Value *RetVal = RI->getReturnValue(); if (RetVal) NewRetVal = B.CreateInsertValue(NewRetVal, RetVal, RetIdx++); for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second) { Argument *Arg = ReturnPoint.first; Value *Val = ReturnPoint.second; Type *EltTy = Arg->getType()->getPointerElementType(); if (Val->getType() != EltTy) { Type *EffectiveEltTy = EltTy; if (StructType *CT = dyn_cast<StructType>(EltTy)) { assert(CT->getNumElements() == 1); EffectiveEltTy = CT->getElementType(0); } if (DL->getTypeSizeInBits(EffectiveEltTy) != DL->getTypeSizeInBits(Val->getType())) { assert(isVec3ToVec4Shuffle(EffectiveEltTy, Val->getType())); Val = B.CreateShuffleVector(Val, UndefValue::get(Val->getType()), { 0, 1, 2 }); } Val = B.CreateBitCast(Val, EffectiveEltTy); // Re-create single element composite. if (EltTy != EffectiveEltTy) Val = B.CreateInsertValue(UndefValue::get(EltTy), Val, 0); } NewRetVal = B.CreateInsertValue(NewRetVal, Val, RetIdx++); } if (RetVal) RI->setOperand(0, NewRetVal); else { B.CreateRet(NewRetVal); RI->eraseFromParent(); } } SmallVector<Value *, 16> StubCallArgs; for (Argument &Arg : F.args()) { if (OutArgIndexes.count(Arg.getArgNo())) { // It's easier to preserve the type of the argument list. We rely on // DeadArgumentElimination to take care of these. StubCallArgs.push_back(UndefValue::get(Arg.getType())); } else { StubCallArgs.push_back(&Arg); } } BasicBlock *StubBB = BasicBlock::Create(Ctx, "", &F); IRBuilder<> B(StubBB); CallInst *StubCall = B.CreateCall(NewFunc, StubCallArgs); int RetIdx = RetTy->isVoidTy() ? 0 : 1; for (Argument &Arg : F.args()) { if (!OutArgIndexes.count(Arg.getArgNo())) continue; PointerType *ArgType = cast<PointerType>(Arg.getType()); auto *EltTy = ArgType->getElementType(); unsigned Align = Arg.getParamAlignment(); if (Align == 0) Align = DL->getABITypeAlignment(EltTy); Value *Val = B.CreateExtractValue(StubCall, RetIdx++); Type *PtrTy = Val->getType()->getPointerTo(ArgType->getAddressSpace()); // We can peek through bitcasts, so the type may not match. Value *PtrVal = B.CreateBitCast(&Arg, PtrTy); B.CreateAlignedStore(Val, PtrVal, Align); } if (!RetTy->isVoidTy()) { B.CreateRet(B.CreateExtractValue(StubCall, 0)); } else { B.CreateRetVoid(); } // The function is now a stub we want to inline. F.addFnAttr(Attribute::AlwaysInline); ++NumOutArgumentFunctionsReplaced; return true; }