bool AMDGPURewriteOutArguments::isOutArgumentCandidate(Argument &Arg) const { const unsigned MaxOutArgSizeBytes = 4 * MaxNumRetRegs; PointerType *ArgTy = dyn_cast<PointerType>(Arg.getType()); // TODO: It might be useful for any out arguments, not just privates. if (!ArgTy || (ArgTy->getAddressSpace() != DL->getAllocaAddrSpace() && !AnyAddressSpace) || Arg.hasByValAttr() || Arg.hasStructRetAttr() || DL->getTypeStoreSize(ArgTy->getPointerElementType()) > MaxOutArgSizeBytes) { return false; } return checkArgumentUses(Arg); }
void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr, MemRef::Callee); if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) { Assert(CS.getCallingConv() == F->getCallingConv(), "Undefined behavior: Caller and callee calling convention differ", &I); FunctionType *FT = F->getFunctionType(); unsigned NumActualArgs = CS.arg_size(); Assert(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs : FT->getNumParams() == NumActualArgs, "Undefined behavior: Call argument count mismatches callee " "argument count", &I); Assert(FT->getReturnType() == I.getType(), "Undefined behavior: Call return type mismatches " "callee return type", &I); // Check argument types (in case the callee was casted) and attributes. // TODO: Verify that caller and callee attributes are compatible. Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); for (; AI != AE; ++AI) { Value *Actual = *AI; if (PI != PE) { Argument *Formal = &*PI++; Assert(Formal->getType() == Actual->getType(), "Undefined behavior: Call argument type mismatches " "callee parameter type", &I); // Check that noalias arguments don't alias other arguments. This is // not fully precise because we don't know the sizes of the dereferenced // memory regions. if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) if (AI != BI && (*BI)->getType()->isPointerTy()) { AliasResult Result = AA->alias(*AI, *BI); Assert(Result != MustAlias && Result != PartialAlias, "Unusual: noalias argument aliases another argument", &I); } // Check that an sret argument points to valid memory. if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { Type *Ty = cast<PointerType>(Formal->getType())->getElementType(); visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), DL->getABITypeAlignment(Ty), Ty, MemRef::Read | MemRef::Write); } } } } if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall()) for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { Value *Obj = findValue(*AI, /*OffsetOk=*/true); Assert(!isa<AllocaInst>(Obj), "Undefined behavior: Call with \"tail\" keyword references " "alloca", &I); } if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) switch (II->getIntrinsicID()) { default: break; // TODO: Check more intrinsics case Intrinsic::memcpy: { MemCpyInst *MCI = cast<MemCpyInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, MCI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, MCI->getAlignment(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial // overlap is not distinguished from the case where nothing is known. uint64_t Size = 0; if (const ConstantInt *Len = dyn_cast<ConstantInt>(findValue(MCI->getLength(), /*OffsetOk=*/false))) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); Assert(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != MustAlias, "Undefined behavior: memcpy source and destination overlap", &I); break; } case Intrinsic::memmove: { MemMoveInst *MMI = cast<MemMoveInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize, MMI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize, MMI->getAlignment(), nullptr, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast<MemSetInst>(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize, MSI->getAlignment(), nullptr, MemRef::Write); break; } case Intrinsic::vastart: Assert(I.getParent()->getParent()->isVarArg(), "Undefined behavior: va_start called in a non-varargs function", &I); visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Write); visitMemoryReference(I, CS.getArgument(1), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read); break; case Intrinsic::vaend: visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::stackrestore: // Stackrestore doesn't read or write memory, but it sets the // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0, nullptr, MemRef::Read | MemRef::Write); break; } }
/// PromoteArguments - This method checks the specified function to see if there /// are any promotable arguments and if it is safe to promote the function (for /// example, all callers are direct). If safe to promote some arguments, it /// calls the DoPromotion method. /// CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. if (!F || !F->hasLocalLinkage()) return nullptr; // Don't promote arguments for variadic functions. Adding, removing, or // changing non-pack parameters can change the classification of pack // parameters. Frontends encode that classification at the call site in the // IR, while in the callee the classification is determined dynamically based // on the number of registers consumed so far. if (F->isVarArg()) return nullptr; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector<Argument*, 16> PointerArgs; for (Argument &I : F->args()) if (I.getType()->isPointerTy()) PointerArgs.push_back(&I); if (PointerArgs.empty()) return nullptr; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function // is self-recursive. bool isSelfRecursive = false; for (Use &U : F->uses()) { CallSite CS(U.getUser()); // Must be a direct call. if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; } const DataLayout &DL = F->getParent()->getDataLayout(); // We need to manually construct BasicAA directly in order to disable its use // of other function analyses. BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F)); // Construct our own AA results for this function. We do this manually to // work around the limitations of the legacy pass manager. AAResults AAR(createLegacyPMAAResults(*this, *F, BAR)); // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet<Argument*, 8> ArgsToPromote; SmallPtrSet<Argument*, 8> ByValArgsToTransform; for (unsigned i = 0, e = PointerArgs.size(); i != e; ++i) { Argument *PtrArg = PointerArgs[i]; Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType(); // Replace sret attribute with noalias. This reduces register pressure by // avoiding a register copy. if (PtrArg->hasStructRetAttr()) { unsigned ArgNo = PtrArg->getArgNo(); F->setAttributes( F->getAttributes() .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); for (Use &U : F->uses()) { CallSite CS(U.getUser()); CS.setAttributes( CS.getAttributes() .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); } } // If this is a byval argument, and if the aggregate type is small, just // pass the elements, which is always safe, if the passed value is densely // packed or if we can prove the padding bytes are never accessed. This does // not apply to inalloca. bool isSafeToPromote = PtrArg->hasByValAttr() && (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg)); if (isSafeToPromote) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { if (maxElements > 0 && STy->getNumElements() > maxElements) { DEBUG(dbgs() << "argpromotion disable promoting argument '" << PtrArg->getName() << "' because it would require adding more" << " than " << maxElements << " arguments to the function.\n"); continue; } // If all the elements are single-value types, we can promote it. bool AllSimple = true; for (const auto *EltTy : STy->elements()) { if (!EltTy->isSingleValueType()) { AllSimple = false; break; } } // Safe to transform, don't even bother trying to "promote" it. // Passing the elements as a scalar will allow sroa to hack on // the new alloca we introduce. if (AllSimple) { ByValArgsToTransform.insert(PtrArg); continue; } } } // If the argument is a recursive type and we're in a recursive // function, we could end up infinitely peeling the function argument. if (isSelfRecursive) { if (StructType *STy = dyn_cast<StructType>(AgTy)) { bool RecursiveType = false; for (const auto *EltTy : STy->elements()) { if (EltTy == PtrArg->getType()) { RecursiveType = true; break; } } if (RecursiveType) continue; } } // Otherwise, see if we can promote the pointer to its value. if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR)) ArgsToPromote.insert(PtrArg); } // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) return nullptr; return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); }