bool IRTranslator::translateCall(const CallInst &CI) { auto TII = MIRBuilder.getMF().getTarget().getIntrinsicInfo(); const Function &F = *CI.getCalledFunction(); Intrinsic::ID ID = F.getIntrinsicID(); if (TII && ID == Intrinsic::not_intrinsic) ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(&F)); assert(ID != Intrinsic::not_intrinsic && "FIXME: support real calls"); // Need types (starting with return) & args. SmallVector<LLT, 4> Tys; Tys.emplace_back(*CI.getType()); for (auto &Arg : CI.arg_operands()) Tys.emplace_back(*Arg->getType()); unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); MachineInstrBuilder MIB = MIRBuilder.buildIntrinsic(Tys, ID, Res, !CI.doesNotAccessMemory()); for (auto &Arg : CI.arg_operands()) { if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) MIB.addImm(CI->getSExtValue()); else MIB.addUse(getOrCreateVReg(*Arg)); } return true; }
/** * @brief Tries to find functions that can be called by indirect call. * * @par Preconditions * - @a callInst is a call that calls some function indirectly. * * @param[in] call We try to find functions for this indirect call. * @param[in] funcsToCheck We are finding functions that can be indirectly called * only in this functions. * * @return Found functions that can be called indirectly. */ FuncSet IndirectlyCalledFuncsAnalysis::getFuncsForIndirectCall( const CallInst &call, const FuncVec &funcsToCheck) { assert(isIndirectCall(call) && "Expected an indirect call."); FuncSet result; Type *callReturnType = call.getType(); for (Function *func : funcsToCheck) { if (func->getReturnType() != callReturnType) { continue; } if (!func->isVarArg()) { if (!hasEqArgsAndParams(call, *func)) { continue; } } result.insert(func); } return result; }
void replaceAllCallsWith(Value* OldFunc, Value* NewFunc) { for (Value::use_iterator I = OldFunc->use_begin(), E = OldFunc->use_end(); I != E; ++I) { if (CallInst* call = dyn_cast<CallInst>(*I)) { std::vector<Value*> args; for(int i=0; i<call->getNumArgOperands(); i++) { args.push_back(call->getArgOperand(i)); } ArrayRef<Value*> Args(args); CallInst *newCall = CallInst::Create(NewFunc, Args); if (newCall->getType() != call->getType()) { if (call->use_begin() != call->use_end()) { errs() << "Cannot handle usage of non matching return types for " << *call->getType() << " and " << *newCall->getType() << "\n"; } newCall->insertBefore(call); call->replaceAllUsesWith(newCall); call->eraseFromParent(); } else { ReplaceInstWithInst(call, newCall); } } else { (*I)->print(errs()); errs() << "\n"; exit(1); } } }
bool CallLowering::lowerCall( MachineIRBuilder &MIRBuilder, const CallInst &CI, unsigned ResReg, ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const { auto &DL = CI.getParent()->getParent()->getParent()->getDataLayout(); // First step is to marshall all the function's parameters into the correct // physregs and memory locations. Gather the sequence of argument types that // we'll pass to the assigner function. SmallVector<ArgInfo, 8> OrigArgs; unsigned i = 0; for (auto &Arg : CI.arg_operands()) { ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}}; setArgFlags(OrigArg, i + 1, DL, CI); OrigArgs.push_back(OrigArg); ++i; } MachineOperand Callee = MachineOperand::CreateImm(0); if (Function *F = CI.getCalledFunction()) Callee = MachineOperand::CreateGA(F, 0); else Callee = MachineOperand::CreateReg(GetCalleeReg(), false); ArgInfo OrigRet{ResReg, CI.getType(), ISD::ArgFlagsTy{}}; if (!OrigRet.Ty->isVoidTy()) setArgFlags(OrigRet, AttributeSet::ReturnIndex, DL, CI); return lowerCall(MIRBuilder, Callee, OrigRet, OrigArgs); }
// -- handle call instruction -- void UnsafeTypeCastingCheck::handleCallInstruction (Instruction *inst) { CallInst *cinst = dyn_cast<CallInst>(inst); if (cinst == NULL) utccAbort("handleCallInstruction cannot process with a non-call instruction"); Type *ctype = cinst->getType(); string func_name = cinst->getCalledFunction()->getName().str(); if (func_name.compare("fabs") == 0 || func_name.compare("sqrt") == 0 || func_name.compare("exp") == 0) { setExprType(cinst, NFP_UT); } else if (func_name.compare("ceil") == 0 || func_name.compare("floor") == 0) { assert(inst->getNumOperands() == 2); Value *arg = inst->getOperand(0); UTCC_TYPE argt = queryExprType(arg); setExprType(cinst, argt); } else if (func_name.compare("max") == 0) { assert(inst->getNumOperands() == 3); Value *op0 = inst->getOperand(0); Value *op1 = inst->getOperand(1); UTCC_TYPE t0 = queryExprType(op0); UTCC_TYPE t1 = queryExprType(op1); if (t0 == NFP_UT || t1 == NFP_UT) setExprType(cinst, NFP_UT); else setExprType(cinst, FP_UT); } else if (func_name.compare("min") == 0) { assert(inst->getNumOperands() == 3); Value *op0 = inst->getOperand(0); Value *op1 = inst->getOperand(1); UTCC_TYPE t0 = queryExprType(op0); UTCC_TYPE t1 = queryExprType(op1); if (t0 == NFP_UT && t1 == NFP_UT) setExprType(cinst, NFP_UT); else setExprType(cinst, FP_UT); } else if (func_name.compare("claimNonNegativeInt") == 0 || func_name.compare("claimNonNegativeUint") == 0) { assert(inst->getNumOperands() == 2); Value *arg = inst->getOperand(0); setPointedType(arg, NINT_UT); } else if (func_name.compare("claimNonNegativeFP32") == 0 || func_name.compare("claimNonNegativeFP64") == 0) { assert(inst->getNumOperands() == 2); Value *arg = inst->getOperand(0); setPointedType(arg, NFP_UT); } else setExprType(cinst, llvmT2utccT(ctype, cinst)); }
/// \brief Check call has a unary float signature /// It checks following: /// a) call should have a single argument /// b) argument type should be floating point type /// c) call instruction type and argument type should be same /// d) call should only reads memory. /// If all these condition is met then return ValidIntrinsicID /// else return not_intrinsic. Intrinsic::ID llvm::checkUnaryFloatSignature(const CallInst &I, Intrinsic::ID ValidIntrinsicID) { if (I.getNumArgOperands() != 1 || !I.getArgOperand(0)->getType()->isFloatingPointTy() || I.getType() != I.getArgOperand(0)->getType() || !I.onlyReadsMemory()) return Intrinsic::not_intrinsic; return ValidIntrinsicID; }
/** * removeUndefCalls -- remove calls with undef function * * These are irrelevant to the code, so may be removed completely. */ void FunctionStaticSlicer::removeUndefCalls(ModulePass *MP, Function &F) { for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E;) { CallInst *CI = dyn_cast<CallInst>(&*I); ++I; if (CI && isa<UndefValue>(CI->getCalledValue())) { CI->replaceAllUsesWith(UndefValue::get(CI->getType())); CI->eraseFromParent(); } } }
static bool ExpandOpForIntSize(Module *M, unsigned Bits, bool Mul) { IntegerType *IntTy = IntegerType::get(M->getContext(), Bits); SmallVector<Type *, 1> Types; Types.push_back(IntTy); Intrinsic::ID ID = (Mul ? Intrinsic::umul_with_overflow : Intrinsic::uadd_with_overflow); std::string Name = Intrinsic::getName(ID, Types); Function *Intrinsic = M->getFunction(Name); if (!Intrinsic) return false; for (Value::use_iterator CallIter = Intrinsic->use_begin(), E = Intrinsic->use_end(); CallIter != E; ) { CallInst *Call = dyn_cast<CallInst>(*CallIter++); if (!Call) { report_fatal_error("ExpandArithWithOverflow: Taking the address of a " "*.with.overflow intrinsic is not allowed"); } Value *VariableArg; ConstantInt *ConstantArg; if (ConstantInt *C = dyn_cast<ConstantInt>(Call->getArgOperand(0))) { VariableArg = Call->getArgOperand(1); ConstantArg = C; } else if (ConstantInt *C = dyn_cast<ConstantInt>(Call->getArgOperand(1))) { VariableArg = Call->getArgOperand(0); ConstantArg = C; } else { errs() << "Use: " << *Call << "\n"; report_fatal_error("ExpandArithWithOverflow: At least one argument of " "*.with.overflow must be a constant"); } Value *ArithResult = BinaryOperator::Create( (Mul ? Instruction::Mul : Instruction::Add), VariableArg, ConstantArg, Call->getName() + ".arith", Call); uint64_t ArgMax; if (Mul) { ArgMax = UintTypeMax(Bits) / ConstantArg->getZExtValue(); } else { ArgMax = UintTypeMax(Bits) - ConstantArg->getZExtValue(); } Value *OverflowResult = new ICmpInst( Call, CmpInst::ICMP_UGT, VariableArg, ConstantInt::get(IntTy, ArgMax), Call->getName() + ".overflow"); // Construct the struct result. Value *NewStruct = UndefValue::get(Call->getType()); NewStruct = CreateInsertValue(NewStruct, 0, ArithResult, Call); NewStruct = CreateInsertValue(NewStruct, 1, OverflowResult, Call); Call->replaceAllUsesWith(NewStruct); Call->eraseFromParent(); } Intrinsic->eraseFromParent(); return true; }
static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI, const TargetTransformInfo *TTI) { bool Changed = false; Function::iterator CurrBB; for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) { CurrBB = BB++; for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end(); II != IE; ++II) { CallInst *Call = dyn_cast<CallInst>(&*II); Function *CalledFunc; if (!Call || !(CalledFunc = Call->getCalledFunction())) continue; if (Call->isNoBuiltin()) continue; // Skip if function either has local linkage or is not a known library // function. LibFunc LF; if (CalledFunc->hasLocalLinkage() || !TLI->getLibFunc(*CalledFunc, LF) || !TLI->has(LF)) continue; switch (LF) { case LibFunc_sqrtf: case LibFunc_sqrt: if (TTI->haveFastSqrt(Call->getType()) && optimizeSQRT(Call, CalledFunc, *CurrBB, BB, TTI)) break; continue; default: continue; } Changed = true; break; } } return Changed; }
bool IRTranslator::translateMemcpy(const CallInst &CI) { LLT SizeTy{*CI.getArgOperand(2)->getType(), *DL}; if (cast<PointerType>(CI.getArgOperand(0)->getType())->getAddressSpace() != 0 || cast<PointerType>(CI.getArgOperand(1)->getType())->getAddressSpace() != 0 || SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0)) return false; SmallVector<CallLowering::ArgInfo, 8> Args; for (int i = 0; i < 3; ++i) { const auto &Arg = CI.getArgOperand(i); Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType()); } MachineOperand Callee = MachineOperand::CreateES("memcpy"); return CLI->lowerCall(MIRBuilder, Callee, CallLowering::ArgInfo(0, CI.getType()), Args); }
bool PartiallyInlineLibCalls::runOnFunction(Function &F) { bool Changed = false; Function::iterator CurrBB; TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>(); const TargetTransformInfo *TTI = &getAnalysis<TargetTransformInfo>(); for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE;) { CurrBB = BB++; for (BasicBlock::iterator II = CurrBB->begin(), IE = CurrBB->end(); II != IE; ++II) { CallInst *Call = dyn_cast<CallInst>(&*II); Function *CalledFunc; if (!Call || !(CalledFunc = Call->getCalledFunction())) continue; // Skip if function either has local linkage or is not a known library // function. LibFunc::Func LibFunc; if (CalledFunc->hasLocalLinkage() || !CalledFunc->hasName() || !TLI->getLibFunc(CalledFunc->getName(), LibFunc)) continue; switch (LibFunc) { case LibFunc::sqrtf: case LibFunc::sqrt: if (TTI->haveFastSqrt(Call->getType()) && optimizeSQRT(Call, CalledFunc, *CurrBB, BB)) break; continue; default: continue; } Changed = true; break; } } return Changed; }
static void ThunkGToF(Function *F, Function *G) { Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", G->getParent()); BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); std::vector<Value *> Args; unsigned i = 0; const FunctionType *FFTy = F->getFunctionType(); for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); AI != AE; ++AI) { if (FFTy->getParamType(i) == AI->getType()) Args.push_back(AI); else { Value *BCI = new BitCastInst(AI, FFTy->getParamType(i), "", BB); Args.push_back(BCI); } ++i; } CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB); CI->setTailCall(); CI->setCallingConv(F->getCallingConv()); if (NewG->getReturnType() == Type::getVoidTy(F->getContext())) { ReturnInst::Create(F->getContext(), BB); } else if (CI->getType() != NewG->getReturnType()) { Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB); ReturnInst::Create(F->getContext(), BCI, BB); } else { ReturnInst::Create(F->getContext(), CI, BB); } NewG->copyAttributesFrom(G); NewG->takeName(G); G->replaceAllUsesWith(NewG); G->eraseFromParent(); // TODO: look at direct callers to G and make them all direct callers to F. }
void AtomicVisitor::replaceInstructionWithIntrinsicCall( Instruction &I, const NaCl::AtomicIntrinsics::AtomicIntrinsic *Intrinsic, Type *DstType, Type *OverloadedType, ArrayRef<Value *> Args) { std::string Name(I.getName()); Function *F = Intrinsic->getDeclaration(&M); CallInst *Call = CallInst::Create(F, Args, "", &I); Call->setDebugLoc(I.getDebugLoc()); Instruction *Res = Call; assert((I.getType()->isStructTy() == isa<AtomicCmpXchgInst>(&I)) && "cmpxchg returns a struct, and other instructions don't"); if (auto S = dyn_cast<StructType>(I.getType())) { assert(S->getNumElements() == 2 && "cmpxchg returns a struct with two elements"); assert(S->getElementType(0) == DstType && "cmpxchg struct's first member should be the value type"); assert(S->getElementType(1) == Type::getInt1Ty(C) && "cmpxchg struct's second member should be the success flag"); // Recreate struct { T value, i1 success } after the call. auto Success = CmpInst::Create( Instruction::ICmp, CmpInst::ICMP_EQ, Res, cast<AtomicCmpXchgInst>(&I)->getCompareOperand(), "success", &I); Res = InsertValueInst::Create( InsertValueInst::Create(UndefValue::get(S), Res, 0, Name + ".insert.value", &I), Success, 1, Name + ".insert.success", &I); } else if (!Call->getType()->isVoidTy() && DstType != OverloadedType) { // The call returns a value which needs to be cast to a non-integer. Res = createCast(I, Call, DstType, Name + ".cast"); Res->setDebugLoc(I.getDebugLoc()); } I.replaceAllUsesWith(Res); I.eraseFromParent(); Call->setName(Name); ModifiedModule = true; }
bool NVVMReflect::runOnModule(Module &M) { if (!NVVMReflectEnabled) return false; setVarMap(); ReflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION); // If reflect function is not used, then there will be // no entry in the module. if (ReflectFunction == 0) return false; // Validate _reflect function assert(ReflectFunction->isDeclaration() && "_reflect function should not have a body"); assert(ReflectFunction->getReturnType()->isIntegerTy() && "_reflect's return type should be integer"); std::vector<Instruction *> ToRemove; // Go through the uses of ReflectFunction in this Function. // Each of them should a CallInst with a ConstantArray argument. // First validate that. If the c-string corresponding to the // ConstantArray can be found successfully, see if it can be // found in VarMap. If so, replace the uses of CallInst with the // value found in VarMap. If not, replace the use with value 0. for (User *U : ReflectFunction->users()) { assert(isa<CallInst>(U) && "Only a call instruction can use _reflect"); CallInst *Reflect = cast<CallInst>(U); assert((Reflect->getNumOperands() == 2) && "Only one operand expect for _reflect function"); // In cuda, we will have an extra constant-to-generic conversion of // the string. const Value *conv = Reflect->getArgOperand(0); assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion"); const CallInst *ConvCall = cast<CallInst>(conv); const Value *str = ConvCall->getArgOperand(0); assert(isa<ConstantExpr>(str) && "Format of _reflect function not recognized"); const ConstantExpr *GEP = cast<ConstantExpr>(str); const Value *Sym = GEP->getOperand(0); assert(isa<Constant>(Sym) && "Format of _reflect function not recognized"); const Constant *SymStr = cast<Constant>(Sym); assert(isa<ConstantDataSequential>(SymStr->getOperand(0)) && "Format of _reflect function not recognized"); assert(cast<ConstantDataSequential>(SymStr->getOperand(0))->isCString() && "Format of _reflect function not recognized"); std::string ReflectArg = cast<ConstantDataSequential>(SymStr->getOperand(0))->getAsString(); ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1); DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n"); int ReflectVal = 0; // The default value is 0 if (VarMap.find(ReflectArg) != VarMap.end()) { ReflectVal = VarMap[ReflectArg]; } Reflect->replaceAllUsesWith( ConstantInt::get(Reflect->getType(), ReflectVal)); ToRemove.push_back(Reflect); } if (ToRemove.size() == 0) return false; for (unsigned i = 0, e = ToRemove.size(); i != e; ++i) ToRemove[i]->eraseFromParent(); return true; }
/// run - Start execution with the specified function and arguments. /// GenericValue JIT::runFunction(Function *F, const std::vector<GenericValue> &ArgValues) { assert(F && "Function *F was null at entry to run()"); void *FPtr = getPointerToFunction(F); assert(FPtr && "Pointer to fn's code was null after getPointerToFunction"); const FunctionType *FTy = F->getFunctionType(); const Type *RetTy = FTy->getReturnType(); assert((FTy->getNumParams() == ArgValues.size() || (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) && "Wrong number of arguments passed into function!"); assert(FTy->getNumParams() == ArgValues.size() && "This doesn't support passing arguments through varargs (yet)!"); // Handle some common cases first. These cases correspond to common `main' // prototypes. if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) { switch (ArgValues.size()) { case 3: if (FTy->getParamType(0)->isIntegerTy(32) && FTy->getParamType(1)->isPointerTy() && FTy->getParamType(2)->isPointerTy()) { int (*PF)(int, char **, const char **) = (int(*)(int, char **, const char **))(intptr_t)FPtr; // Call the function. GenericValue rv; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), (char **)GVTOP(ArgValues[1]), (const char **)GVTOP(ArgValues[2]))); return rv; } break; case 2: if (FTy->getParamType(0)->isIntegerTy(32) && FTy->getParamType(1)->isPointerTy()) { int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; // Call the function. GenericValue rv; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), (char **)GVTOP(ArgValues[1]))); return rv; } break; case 1: if (FTy->getNumParams() == 1 && FTy->getParamType(0)->isIntegerTy(32)) { GenericValue rv; int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); return rv; } break; } } // Handle cases where no arguments are passed first. if (ArgValues.empty()) { GenericValue rv; switch (RetTy->getTypeID()) { default: llvm_unreachable("Unknown return type for function call!"); case Type::IntegerTyID: { unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth(); if (BitWidth == 1) rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)()); else if (BitWidth <= 8) rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)()); else if (BitWidth <= 16) rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)()); else if (BitWidth <= 32) rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)()); else if (BitWidth <= 64) rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)()); else llvm_unreachable("Integer types > 64 bits not supported"); return rv; } case Type::VoidTyID: rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)()); return rv; case Type::FloatTyID: rv.FloatVal = ((float(*)())(intptr_t)FPtr)(); return rv; case Type::DoubleTyID: rv.DoubleVal = ((double(*)())(intptr_t)FPtr)(); return rv; case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: llvm_unreachable("long double not supported yet"); return rv; case Type::PointerTyID: return PTOGV(((void*(*)())(intptr_t)FPtr)()); } } // Okay, this is not one of our quick and easy cases. Because we don't have a // full FFI, we have to codegen a nullary stub function that just calls the // function we are interested in, passing in constants for all of the // arguments. Make this function and return. // First, create the function. FunctionType *STy=FunctionType::get(RetTy, false); Function *Stub = Function::Create(STy, Function::InternalLinkage, "", F->getParent()); // Insert a basic block. BasicBlock *StubBB = BasicBlock::Create(F->getContext(), "", Stub); // Convert all of the GenericValue arguments over to constants. Note that we // currently don't support varargs. SmallVector<Value*, 8> Args; for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) { Constant *C = 0; const Type *ArgTy = FTy->getParamType(i); const GenericValue &AV = ArgValues[i]; switch (ArgTy->getTypeID()) { default: llvm_unreachable("Unknown argument type for function call!"); case Type::IntegerTyID: C = ConstantInt::get(F->getContext(), AV.IntVal); break; case Type::FloatTyID: C = ConstantFP::get(F->getContext(), APFloat(AV.FloatVal)); break; case Type::DoubleTyID: C = ConstantFP::get(F->getContext(), APFloat(AV.DoubleVal)); break; case Type::PPC_FP128TyID: case Type::X86_FP80TyID: case Type::FP128TyID: C = ConstantFP::get(F->getContext(), APFloat(AV.IntVal)); break; case Type::PointerTyID: void *ArgPtr = GVTOP(AV); if (sizeof(void*) == 4) C = ConstantInt::get(Type::getInt32Ty(F->getContext()), (int)(intptr_t)ArgPtr); else C = ConstantInt::get(Type::getInt64Ty(F->getContext()), (intptr_t)ArgPtr); // Cast the integer to pointer C = ConstantExpr::getIntToPtr(C, ArgTy); break; } Args.push_back(C); } CallInst *TheCall = CallInst::Create(F, Args.begin(), Args.end(), "", StubBB); TheCall->setCallingConv(F->getCallingConv()); TheCall->setTailCall(); if (!TheCall->getType()->isVoidTy()) // Return result of the call. ReturnInst::Create(F->getContext(), TheCall, StubBB); else ReturnInst::Create(F->getContext(), StubBB); // Just return void. // Finally, call our nullary stub function. GenericValue Result = runFunction(Stub, std::vector<GenericValue>()); // Erase it, since no other function can have a reference to it. Stub->eraseFromParent(); // And return the result. return Result; }
bool ObjCARCContract::tryToPeepholeInstruction( Function &F, Instruction *Inst, inst_iterator &Iter, SmallPtrSetImpl<Instruction *> &DependingInsts, SmallPtrSetImpl<const BasicBlock *> &Visited, bool &TailOkForStoreStrongs) { // Only these library routines return their argument. In particular, // objc_retainBlock does not necessarily return its argument. ARCInstKind Class = GetBasicARCInstKind(Inst); switch (Class) { case ARCInstKind::FusedRetainAutorelease: case ARCInstKind::FusedRetainAutoreleaseRV: return false; case ARCInstKind::Autorelease: case ARCInstKind::AutoreleaseRV: return contractAutorelease(F, Inst, Class, DependingInsts, Visited); case ARCInstKind::Retain: // Attempt to convert retains to retainrvs if they are next to function // calls. if (!optimizeRetainCall(F, Inst)) return false; // If we succeed in our optimization, fall through. // FALLTHROUGH case ARCInstKind::RetainRV: { // If we're compiling for a target which needs a special inline-asm // marker to do the retainAutoreleasedReturnValue optimization, // insert it now. if (!RetainRVMarker) return false; BasicBlock::iterator BBI = Inst; BasicBlock *InstParent = Inst->getParent(); // Step up to see if the call immediately precedes the RetainRV call. // If it's an invoke, we have to cross a block boundary. And we have // to carefully dodge no-op instructions. do { if (&*BBI == InstParent->begin()) { BasicBlock *Pred = InstParent->getSinglePredecessor(); if (!Pred) goto decline_rv_optimization; BBI = Pred->getTerminator(); break; } --BBI; } while (IsNoopInstruction(BBI)); if (&*BBI == GetArgRCIdentityRoot(Inst)) { DEBUG(dbgs() << "Adding inline asm marker for " "retainAutoreleasedReturnValue optimization.\n"); Changed = true; InlineAsm *IA = InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), /*isVarArg=*/false), RetainRVMarker->getString(), /*Constraints=*/"", /*hasSideEffects=*/true); CallInst::Create(IA, "", Inst); } decline_rv_optimization: return false; } case ARCInstKind::InitWeak: { // objc_initWeak(p, null) => *p = null CallInst *CI = cast<CallInst>(Inst); if (IsNullOrUndef(CI->getArgOperand(1))) { Value *Null = ConstantPointerNull::get(cast<PointerType>(CI->getType())); Changed = true; new StoreInst(Null, CI->getArgOperand(0), CI); DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n" << " New = " << *Null << "\n"); CI->replaceAllUsesWith(Null); CI->eraseFromParent(); } return true; } case ARCInstKind::Release: // Try to form an objc store strong from our release. If we fail, there is // nothing further to do below, so continue. tryToContractReleaseIntoStoreStrong(Inst, Iter); return true; case ARCInstKind::User: // Be conservative if the function has any alloca instructions. // Technically we only care about escaping alloca instructions, // but this is sufficient to handle some interesting cases. if (isa<AllocaInst>(Inst)) TailOkForStoreStrongs = false; return true; case ARCInstKind::IntrinsicUser: // Remove calls to @clang.arc.use(...). Inst->eraseFromParent(); return true; default: return true; } }
// // Method: runOnModule() // // Description: // Entry point for this LLVM pass. // Clone functions that take LoadInsts as arguments // // Inputs: // M - A reference to the LLVM module to transform // // Outputs: // M - The transformed LLVM module. // // Return value: // true - The module was modified. // false - The module was not modified. // bool LoadArgs::runOnModule(Module& M) { std::map<std::pair<Function*, const Type * > , Function* > fnCache; bool changed; do { changed = false; for (Module::iterator Func = M.begin(); Func != M.end(); ++Func) { for (Function::iterator B = Func->begin(), FE = Func->end(); B != FE; ++B) { for (BasicBlock::iterator I = B->begin(), BE = B->end(); I != BE;) { CallInst *CI = dyn_cast<CallInst>(I++); if(!CI) continue; if(CI->hasByValArgument()) continue; // if the CallInst calls a function, that is externally defined, // or might be changed, ignore this call site. Function *F = CI->getCalledFunction(); if (!F || (F->isDeclaration() || F->mayBeOverridden())) continue; if(F->hasStructRetAttr()) continue; if(F->isVarArg()) continue; // find the argument we must replace Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); unsigned argNum = 0; for(; argNum < CI->getNumArgOperands();argNum++, ++ai) { // do not care about dead arguments if(ai->use_empty()) continue; if(F->getAttributes().getParamAttributes(argNum).hasAttrSomewhere(Attribute::SExt) || F->getAttributes().getParamAttributes(argNum).hasAttrSomewhere(Attribute::ZExt)) continue; if (isa<LoadInst>(CI->getArgOperand(argNum))) break; } // if no argument was a GEP operator to be changed if(ai == ae) continue; LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(argNum)); Instruction * InsertPt = &(Func->getEntryBlock().front()); AllocaInst *NewVal = new AllocaInst(LI->getType(), "",InsertPt); StoreInst *Copy = new StoreInst(LI, NewVal); Copy->insertAfter(LI); /*if(LI->getParent() != CI->getParent()) continue; // Also check that there is no store after the load. // TODO: Check if the load/store do not alias. BasicBlock::iterator bii = LI->getParent()->begin(); Instruction *BII = bii; while(BII != LI) { ++bii; BII = bii; } while(BII != CI) { if(isa<StoreInst>(BII)) break; ++bii; BII = bii; } if(isa<StoreInst>(bii)){ continue; }*/ // Construct the new Type // Appends the struct Type at the beginning std::vector<Type*>TP; for(unsigned c = 0; c < CI->getNumArgOperands();c++) { if(c == argNum) TP.push_back(LI->getPointerOperand()->getType()); TP.push_back(CI->getArgOperand(c)->getType()); } //return type is same as that of original instruction FunctionType *NewFTy = FunctionType::get(CI->getType(), TP, false); numSimplified++; //if(numSimplified > 1000) //return true; Function *NewF; std::map<std::pair<Function*, const Type* > , Function* >::iterator Test; Test = fnCache.find(std::make_pair(F, NewFTy)); if(Test != fnCache.end()) { NewF = Test->second; } else { NewF = Function::Create(NewFTy, GlobalValue::InternalLinkage, F->getName().str() + ".TEST", &M); fnCache[std::make_pair(F, NewFTy)] = NewF; Function::arg_iterator NI = NewF->arg_begin(); ValueToValueMapTy ValueMap; unsigned count = 0; for (Function::arg_iterator II = F->arg_begin(); NI != NewF->arg_end(); ++count, ++NI) { if(count == argNum) { NI->setName("LDarg"); continue; } ValueMap[II] = NI; NI->setName(II->getName()); NI->addAttr(F->getAttributes().getParamAttributes(II->getArgNo() + 1)); ++II; } // Perform the cloning. SmallVector<ReturnInst*,100> Returns; CloneFunctionInto(NewF, F, ValueMap, false, Returns); std::vector<Value*> fargs; for(Function::arg_iterator ai = NewF->arg_begin(), ae= NewF->arg_end(); ai != ae; ++ai) { fargs.push_back(ai); } NewF->setAttributes(NewF->getAttributes().addAttributes( F->getContext(), 0, F->getAttributes().getRetAttributes())); NewF->setAttributes(NewF->getAttributes().addAttributes( F->getContext(), ~0, F->getAttributes().getFnAttributes())); //Get the point to insert the GEP instr. Instruction *InsertPoint; for (BasicBlock::iterator insrt = NewF->front().begin(); isa<AllocaInst>(InsertPoint = insrt); ++insrt) {;} LoadInst *LI_new = new LoadInst(fargs.at(argNum), "", InsertPoint); fargs.at(argNum+1)->replaceAllUsesWith(LI_new); } //this does not seem to be a good idea AttributeSet NewCallPAL=AttributeSet(); // Get the initial attributes of the call AttributeSet CallPAL = CI->getAttributes(); AttributeSet RAttrs = CallPAL.getRetAttributes(); AttributeSet FnAttrs = CallPAL.getFnAttributes(); if (!RAttrs.isEmpty()) NewCallPAL=NewCallPAL.addAttributes(F->getContext(),0, RAttrs); SmallVector<Value*, 8> Args; for(unsigned j =0;j<CI->getNumArgOperands();j++) { if(j == argNum) { Args.push_back(NewVal); } Args.push_back(CI->getArgOperand(j)); // position in the NewCallPAL AttributeSet Attrs = CallPAL.getParamAttributes(j+1); if (!Attrs.isEmpty()) NewCallPAL=NewCallPAL.addAttributes(F->getContext(),Args.size(), Attrs); } // Create the new attributes vec. if (!FnAttrs.isEmpty()) NewCallPAL=NewCallPAL.addAttributes(F->getContext(),~0, FnAttrs); CallInst *CallI = CallInst::Create(NewF,Args,"", CI); CallI->setCallingConv(CI->getCallingConv()); CallI->setAttributes(NewCallPAL); CI->replaceAllUsesWith(CallI); CI->eraseFromParent(); changed = true; } } } } while(changed); return true; }
// ============================================================================= // andOOPIsGone (formerly: createProcess) // // Formerly, OOP permitted the same SC_{METHOD,THREAD} functions to apply // to each copy of a SC_MODULE. Aaaaand it's gone ! // (but OTOH we enable better optimizations) // Creates a new C-style function that calls the old member function with the // given sc_module. The call is then inlined. // FIXME: assumes the method is non-virtual and that sc_module is the first // inherited class of the SC_MODULE // ============================================================================= Function *TwetoPassImpl::andOOPIsGone(Function * oldProc, sc_core::sc_module * initiatorMod) { if (!oldProc) return NULL; // can't statically optimize if the address of the module isn't predictible // TODO: also handle already-static variables, which also have // fixed $pc-relative addresses if (staticopt == optlevel && !permalloc::is_from (initiatorMod)) return NULL; LLVMContext & context = getGlobalContext(); FunctionType *funType = oldProc->getFunctionType(); Type *type = funType->getParamType(0); FunctionType *newProcType = FunctionType::get(oldProc->getReturnType(), ArrayRef < Type * >(), false); // Create the new function std::ostringstream id; id << proc_counter++; std::string name = oldProc->getName().str() + std::string("_clone_") + id.str(); Function *newProc = Function::Create(newProcType, Function::ExternalLinkage, name, this->llvmMod); assert(newProc->empty()); newProc->addFnAttr(Attribute::InlineHint); // Create call to old function BasicBlock *bb = BasicBlock::Create(context, "entry", newProc); IRBuilder <> *irb = new IRBuilder <> (context); irb->SetInsertPoint(bb); Value* thisAddr = createRelocatablePointer (type, initiatorMod, irb); CallInst *ci = irb->CreateCall(oldProc, ArrayRef < Value * >(std::vector<Value*>(1,thisAddr))); //bb->getInstList().insert(ci, thisAddr); if (ci->getType()->isVoidTy()) irb->CreateRetVoid(); else irb->CreateRet(ci); // The function should be valid now verifyFunction(*newProc); { // Inline the call DataLayout *td = new DataLayout(this->llvmMod); InlineFunctionInfo i(NULL, td); bool success = InlineFunction(ci, i); assert(success); verifyFunction(*newProc); } // further optimize the function inlineBasicIO (initiatorMod, newProc); newProc->dump(); return newProc; }
bool AMDGPULowerKernelArguments::runOnFunction(Function &F) { CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty()) return false; auto &TPC = getAnalysis<TargetPassConfig>(); const TargetMachine &TM = TPC.getTM<TargetMachine>(); const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F); LLVMContext &Ctx = F.getParent()->getContext(); const DataLayout &DL = F.getParent()->getDataLayout(); BasicBlock &EntryBlock = *F.begin(); IRBuilder<> Builder(&*EntryBlock.begin()); const unsigned KernArgBaseAlign = 16; // FIXME: Increase if necessary const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F); unsigned MaxAlign; // FIXME: Alignment is broken broken with explicit arg offset.; const uint64_t TotalKernArgSize = ST.getKernArgSegmentSize(F, MaxAlign); if (TotalKernArgSize == 0) return false; CallInst *KernArgSegment = Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {}, nullptr, F.getName() + ".kernarg.segment"); KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull); KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize)); unsigned AS = KernArgSegment->getType()->getPointerAddressSpace(); uint64_t ExplicitArgOffset = 0; for (Argument &Arg : F.args()) { Type *ArgTy = Arg.getType(); unsigned Align = DL.getABITypeAlignment(ArgTy); unsigned Size = DL.getTypeSizeInBits(ArgTy); unsigned AllocSize = DL.getTypeAllocSize(ArgTy); uint64_t EltOffset = alignTo(ExplicitArgOffset, Align) + BaseOffset; ExplicitArgOffset = alignTo(ExplicitArgOffset, Align) + AllocSize; if (Arg.use_empty()) continue; if (PointerType *PT = dyn_cast<PointerType>(ArgTy)) { // FIXME: Hack. We rely on AssertZext to be able to fold DS addressing // modes on SI to know the high bits are 0 so pointer adds don't wrap. We // can't represent this with range metadata because it's only allowed for // integer types. if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) && ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) continue; // FIXME: We can replace this with equivalent alias.scope/noalias // metadata, but this appears to be a lot of work. if (Arg.hasNoAliasAttr()) continue; } VectorType *VT = dyn_cast<VectorType>(ArgTy); bool IsV3 = VT && VT->getNumElements() == 3; bool DoShiftOpt = Size < 32 && !ArgTy->isAggregateType(); VectorType *V4Ty = nullptr; int64_t AlignDownOffset = alignDown(EltOffset, 4); int64_t OffsetDiff = EltOffset - AlignDownOffset; unsigned AdjustedAlign = MinAlign(DoShiftOpt ? AlignDownOffset : EltOffset, KernArgBaseAlign); Value *ArgPtr; Type *AdjustedArgTy; if (DoShiftOpt) { // FIXME: Handle aggregate types // Since we don't have sub-dword scalar loads, avoid doing an extload by // loading earlier than the argument address, and extracting the relevant // bits. // // Additionally widen any sub-dword load to i32 even if suitably aligned, // so that CSE between different argument loads works easily. ArgPtr = Builder.CreateConstInBoundsGEP1_64( Builder.getInt8Ty(), KernArgSegment, AlignDownOffset, Arg.getName() + ".kernarg.offset.align.down"); AdjustedArgTy = Builder.getInt32Ty(); } else { ArgPtr = Builder.CreateConstInBoundsGEP1_64( Builder.getInt8Ty(), KernArgSegment, EltOffset, Arg.getName() + ".kernarg.offset"); AdjustedArgTy = ArgTy; } if (IsV3 && Size >= 32) { V4Ty = VectorType::get(VT->getVectorElementType(), 4); // Use the hack that clang uses to avoid SelectionDAG ruining v3 loads AdjustedArgTy = V4Ty; } ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS), ArgPtr->getName() + ".cast"); LoadInst *Load = Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign); Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {})); MDBuilder MDB(Ctx); if (isa<PointerType>(ArgTy)) { if (Arg.hasNonNullAttr()) Load->setMetadata(LLVMContext::MD_nonnull, MDNode::get(Ctx, {})); uint64_t DerefBytes = Arg.getDereferenceableBytes(); if (DerefBytes != 0) { Load->setMetadata( LLVMContext::MD_dereferenceable, MDNode::get(Ctx, MDB.createConstant( ConstantInt::get(Builder.getInt64Ty(), DerefBytes)))); } uint64_t DerefOrNullBytes = Arg.getDereferenceableOrNullBytes(); if (DerefOrNullBytes != 0) { Load->setMetadata( LLVMContext::MD_dereferenceable_or_null, MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(), DerefOrNullBytes)))); } unsigned ParamAlign = Arg.getParamAlignment(); if (ParamAlign != 0) { Load->setMetadata( LLVMContext::MD_align, MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(Builder.getInt64Ty(), ParamAlign)))); } } // TODO: Convert noalias arg to !noalias if (DoShiftOpt) { Value *ExtractBits = OffsetDiff == 0 ? Load : Builder.CreateLShr(Load, OffsetDiff * 8); IntegerType *ArgIntTy = Builder.getIntNTy(Size); Value *Trunc = Builder.CreateTrunc(ExtractBits, ArgIntTy); Value *NewVal = Builder.CreateBitCast(Trunc, ArgTy, Arg.getName() + ".load"); Arg.replaceAllUsesWith(NewVal); } else if (IsV3) { Value *Shuf = Builder.CreateShuffleVector(Load, UndefValue::get(V4Ty), {0, 1, 2}, Arg.getName() + ".load"); Arg.replaceAllUsesWith(Shuf); } else { Load->setName(Arg.getName() + ".load"); Arg.replaceAllUsesWith(Load); } } KernArgSegment->addAttribute( AttributeList::ReturnIndex, Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign))); return true; }
Function* StructuredModuleEditor::wrapFunc(Function *OriginalFunc, Function *PreFunc, Function *PostFunc) { if (OriginalFunc == NULL) return NULL; if (PreFunc != NULL) { for (Function::arg_iterator I = OriginalFunc->arg_begin(), J = PreFunc->arg_begin(), E = OriginalFunc->arg_end(); I != E; ++I, ++J) if (I->getType() != J->getType()) { OS << PreFunc->getName() << " must have the same argument types as the wrappee!\n"; return NULL; } } if (PostFunc != NULL) { if (OriginalFunc->getReturnType()->isVoidTy()) { if (PostFunc->getArgumentList().size() > 0) { OS << PostFunc->getName() << " must accept no arguments because the wrappee returns void!\n"; return NULL; } } else if (PostFunc->getArgumentList().size() != 1 || PostFunc->getArgumentList().front().getType() != OriginalFunc->getReturnType()) { OS << *(PostFunc->getType()) << "..." << *(OriginalFunc->getReturnType()) << "\n"; OS << PostFunc->getName() << " must accept only one argument and that argument must be of the wrappee's return value type!\n"; return NULL; } } // The wrapper copies the given function's arguments and argument types to // two separate vectors std::vector<Value*> WrapperArgs; std::vector<Type*> WrapperArgTypes; for (Function::arg_iterator I = OriginalFunc->arg_begin(), E = OriginalFunc->arg_end(); I != E; ++I) { WrapperArgTypes.push_back(I->getType()); WrapperArgs.push_back(I); } // Creates a function which is identical to the original function except for its name // (will never "get" an existing function since the name is unique) and // inserts it into the Module. The name is guaranteed to be unique because when we // specify a Value's name as "", LLVM generates a unique identifier for it. If we set // the name later on and the name is a duplicate, LLVM will also generate a unique ID. // It is just important to avoid specifying a duplicate name during the "getOrInsert" portion // of our code because we run the risk of getting something which exists instead of // creating something new. Constant* c = M->getOrInsertFunction("", FunctionType::get(OriginalFunc->getReturnType(), WrapperArgTypes, false), OriginalFunc->getAttributes()); Function *Wrapper = cast<Function>(c); Wrapper->setName(OriginalFunc->getName() + "-wrapper"); // The Wrapper function uses the same calling convention as the wrappee. Wrapper->setCallingConv(OriginalFunc->getCallingConv()); // The Wrapper function uses the same parameter names as the wrappee for (Function::arg_iterator I = Wrapper->arg_begin(), J = OriginalFunc->arg_begin(), E = Wrapper->arg_end(); I != E; ++I, ++J) I->setName(J->getName()); // Inserts the Wrapper function into the CFG CG->getOrInsertFunction(Wrapper); // Replaces all references to OriginalFunc with references to Wrapper replaceFunc(OriginalFunc, Wrapper); // Constructs a basic block in the following sequence: // 1) If a pre-function-invocation function is given, creates a call to that function // with the same arguments passed to the wrapped function // 2) Unconditionally creates a call to the function we are wrapping // with the same arguments passed to the wrapped function // 3) If a post-function-invocation function is given, creates a call to that function // with the same return value of the wrapped function // 4) Returns the v BasicBlock *EntryBlock = BasicBlock::Create(getGlobalContext(), "entry", Wrapper); IRBuilder<> builder(EntryBlock); if (PreFunc != NULL) { CallInst *PrologueCall = builder.CreateCall(PreFunc, WrapperArgs); CallSite CS(PrologueCall); (*CG)[Wrapper]->addCalledFunction(CS, (*CG)[PreFunc]); } CallInst *OriginalCall = builder.CreateCall(OriginalFunc, WrapperArgs); CallSite CS(OriginalCall); (*CG)[Wrapper]->addCalledFunction(CS, (*CG)[OriginalFunc]); if (PostFunc != NULL) { CallInst *EpilogueCall; if (OriginalCall->getType()->isVoidTy()) EpilogueCall = builder.CreateCall(PostFunc); else EpilogueCall = builder.CreateCall(PostFunc, OriginalCall); CallSite CS(EpilogueCall); (*CG)[Wrapper]->addCalledFunction(CS, (*CG)[PostFunc]); } if (OriginalCall->getType()->isVoidTy()) builder.CreateRetVoid(); else builder.CreateRet(OriginalCall); // Returns the Wrapper function we have created return Wrapper; }
bool GambasPass::runOnFunction(Function &F){ IRBuilder<> Builder(F.getContext()); bool changed = false; for(Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { for(BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ){ ICmpInst* ICI = dyn_cast<ICmpInst>(I); CallInst* CI = dyn_cast<CallInst>(I++); if (ICI && ICI->hasMetadata() && ICI->getMetadata("unref_slt") && dyn_cast<LoadInst>(ICI->getOperand(0))){ ICI->replaceAllUsesWith(ConstantInt::get(ICI->getType(), false)); ICI->eraseFromParent(); changed = true; continue; } if (!CI) continue; Function* callee = CI->getCalledFunction(); if (callee == NULL || !callee->isDeclaration()) continue; StringRef name = callee->getName(); if (name == "JR_release_variant" || name == "JR_borrow_variant"){ ConstantInt* vtype_int = dyn_cast<ConstantInt>(CI->getArgOperand(0)); if (!vtype_int) continue; uint64_t vtype = vtype_int->getZExtValue(); if (TYPE_is_string(vtype) || TYPE_is_object(vtype)) continue; CI->eraseFromParent(); changed = true; } else if (name == FUNCTION_NAME(__finite)){ ConstantFP* op = dyn_cast<ConstantFP>(CI->getArgOperand(0)); if (!op) continue; int val = __finite(op->getValueAPF().convertToDouble()); Constant* res = ConstantInt::get(CI->getType(), val); CI->replaceAllUsesWith(res); CI->eraseFromParent(); changed = true; } else if (name == FUNCTION_NAME(__isnan)){ ConstantFP* op = dyn_cast<ConstantFP>(CI->getArgOperand(0)); if (!op) continue; int val = __isnan(op->getValueAPF().convertToDouble()); Constant* res = ConstantInt::get(CI->getType(), val); CI->replaceAllUsesWith(res); CI->eraseFromParent(); changed = true; } else if (name == FUNCTION_NAME(__isinf)){ ConstantFP* op = dyn_cast<ConstantFP>(CI->getArgOperand(0)); if (!op) continue; int val = __isinf(op->getValueAPF().convertToDouble()); Constant* res = ConstantInt::get(CI->getType(), val); CI->replaceAllUsesWith(res); CI->eraseFromParent(); changed = true; } } } return changed; }
// // Method: runOnModule() // // Description: // Entry point for this LLVM pass. // Search for all call sites to casted functions. // Check if they only differ in an argument type // Cast the argument, and call the original function // // Inputs: // M - A reference to the LLVM module to transform // // Outputs: // M - The transformed LLVM module. // // Return value: // true - The module was modified. // false - The module was not modified. // bool ArgCast::runOnModule(Module& M) { std::vector<CallInst*> worklist; for (Module::iterator I = M.begin(); I != M.end(); ++I) { if (I->mayBeOverridden()) continue; // Find all uses of this function for(Value::user_iterator ui = I->user_begin(), ue = I->user_end(); ui != ue; ) { // check if is ever casted to a different function type ConstantExpr *CE = dyn_cast<ConstantExpr>(*ui++); if(!CE) continue; if (CE->getOpcode() != Instruction::BitCast) continue; if(CE->getOperand(0) != I) continue; const PointerType *PTy = dyn_cast<PointerType>(CE->getType()); if (!PTy) continue; const Type *ETy = PTy->getElementType(); const FunctionType *FTy = dyn_cast<FunctionType>(ETy); if(!FTy) continue; // casting to a varargs funtion // or function with same number of arguments // possibly varying types of arguments if(FTy->getNumParams() != I->arg_size() && !FTy->isVarArg()) continue; for(Value::user_iterator uii = CE->user_begin(), uee = CE->user_end(); uii != uee; ++uii) { // Find all uses of the casted value, and check if it is // used in a Call Instruction if (CallInst* CI = dyn_cast<CallInst>(*uii)) { // Check that it is the called value, and not an argument if(CI->getCalledValue() != CE) continue; // Check that the number of arguments passed, and expected // by the function are the same. if(!I->isVarArg()) { if(CI->getNumOperands() != I->arg_size() + 1) continue; } else { if(CI->getNumOperands() < I->arg_size() + 1) continue; } // If so, add to worklist worklist.push_back(CI); } } } } // Proces the worklist of potential call sites to transform while(!worklist.empty()) { CallInst *CI = worklist.back(); worklist.pop_back(); // Get the called Function Function *F = cast<Function>(CI->getCalledValue()->stripPointerCasts()); const FunctionType *FTy = F->getFunctionType(); SmallVector<Value*, 8> Args; unsigned i =0; for(i =0; i< FTy->getNumParams(); ++i) { Type *ArgType = CI->getOperand(i+1)->getType(); Type *FormalType = FTy->getParamType(i); // If the types for this argument match, just add it to the // parameter list. No cast needs to be inserted. if(ArgType == FormalType) { Args.push_back(CI->getOperand(i+1)); } else if(ArgType->isPointerTy() && FormalType->isPointerTy()) { CastInst *CastI = CastInst::CreatePointerCast(CI->getOperand(i+1), FormalType, "", CI); Args.push_back(CastI); } else if (ArgType->isIntegerTy() && FormalType->isIntegerTy()) { unsigned SrcBits = ArgType->getScalarSizeInBits(); unsigned DstBits = FormalType->getScalarSizeInBits(); if(SrcBits > DstBits) { CastInst *CastI = CastInst::CreateIntegerCast(CI->getOperand(i+1), FormalType, true, "", CI); Args.push_back(CastI); } else { if (F->getAttributes().hasAttribute(i+1, Attribute::SExt)) { CastInst *CastI = CastInst::CreateIntegerCast(CI->getOperand(i+1), FormalType, true, "", CI); Args.push_back(CastI); } else if (F->getAttributes().hasAttribute(i+1, Attribute::ZExt)) { CastInst *CastI = CastInst::CreateIntegerCast(CI->getOperand(i+1), FormalType, false, "", CI); Args.push_back(CastI); } else { // Use ZExt in default case. // Derived from InstCombine. Also, the only reason this should happen // is mismatched prototypes. // Seen in case of integer constants which get interpreted as i32, // even if being used as i64. // TODO: is this correct? CastInst *CastI = CastInst::CreateIntegerCast(CI->getOperand(i+1), FormalType, false, "", CI); Args.push_back(CastI); } } } else { DEBUG(ArgType->dump()); DEBUG(FormalType->dump()); break; } } // If we found an argument we could not cast, try the next instruction if(i != FTy->getNumParams()) { continue; } if(FTy->isVarArg()) { for(; i< CI->getNumOperands() - 1 ;i++) { Args.push_back(CI->getOperand(i+1)); } } // else replace the call instruction CallInst *CINew = CallInst::Create(F, Args, "", CI); CINew->setCallingConv(CI->getCallingConv()); CINew->setAttributes(CI->getAttributes()); if(!CI->use_empty()) { CastInst *RetCast; if(CI->getType() != CINew->getType()) { if(CI->getType()->isPointerTy() && CINew->getType()->isPointerTy()) RetCast = CastInst::CreatePointerCast(CINew, CI->getType(), "", CI); else if(CI->getType()->isIntOrIntVectorTy() && CINew->getType()->isIntOrIntVectorTy()) RetCast = CastInst::CreateIntegerCast(CINew, CI->getType(), false, "", CI); else if(CI->getType()->isIntOrIntVectorTy() && CINew->getType()->isPointerTy()) RetCast = CastInst::CreatePointerCast(CINew, CI->getType(), "", CI); else if(CI->getType()->isPointerTy() && CINew->getType()->isIntOrIntVectorTy()) RetCast = new IntToPtrInst(CINew, CI->getType(), "", CI); else { // TODO: I'm not sure what right behavior is here, but this case should be handled. llvm_unreachable("Unexpected type conversion in call!"); abort(); } CI->replaceAllUsesWith(RetCast); } else { CI->replaceAllUsesWith(CINew); } } // Debug printing DEBUG(errs() << "ARGCAST:"); DEBUG(errs() << "ERASE:"); DEBUG(CI->dump()); DEBUG(errs() << "ARGCAST:"); DEBUG(errs() << "ADDED:"); DEBUG(CINew->dump()); CI->eraseFromParent(); numChanged++; } return true; }
// // Method: runOnModule() // // Description: // Entry point for this LLVM pass. // Clone functions that take GEPs as arguments // // Inputs: // M - A reference to the LLVM module to transform // // Outputs: // M - The transformed LLVM module. // // Return value: // true - The module was modified. // false - The module was not modified. // bool GEPExprArgs::runOnModule(Module& M) { bool changed; do { changed = false; for (Module::iterator F = M.begin(); F != M.end(); ++F){ for (Function::iterator B = F->begin(), FE = F->end(); B != FE; ++B) { for (BasicBlock::iterator I = B->begin(), BE = B->end(); I != BE;) { CallInst *CI = dyn_cast<CallInst>(I++); if(!CI) continue; if(CI->hasByValArgument()) continue; // if the GEP calls a function, that is externally defined, // or might be changed, ignore this call site. Function *F = CI->getCalledFunction(); if (!F || (F->isDeclaration() || F->mayBeOverridden())) continue; if(F->hasStructRetAttr()) continue; if(F->isVarArg()) continue; // find the argument we must replace Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); unsigned argNum = 1; for(; argNum < CI->getNumOperands();argNum++, ++ai) { if(ai->use_empty()) continue; if (isa<GEPOperator>(CI->getOperand(argNum))) break; } // if no argument was a GEP operator to be changed if(ai == ae) continue; GEPOperator *GEP = dyn_cast<GEPOperator>(CI->getOperand(argNum)); if(!GEP->hasAllConstantIndices()) continue; // Construct the new Type // Appends the struct Type at the beginning std::vector<Type*>TP; TP.push_back(GEP->getPointerOperand()->getType()); for(unsigned c = 1; c < CI->getNumOperands();c++) { TP.push_back(CI->getOperand(c)->getType()); } //return type is same as that of original instruction FunctionType *NewFTy = FunctionType::get(CI->getType(), TP, false); Function *NewF; numSimplified++; if(numSimplified > 800) return true; NewF = Function::Create(NewFTy, GlobalValue::InternalLinkage, F->getName().str() + ".TEST", &M); Function::arg_iterator NI = NewF->arg_begin(); NI->setName("GEParg"); ++NI; ValueToValueMapTy ValueMap; for (Function::arg_iterator II = F->arg_begin(); NI != NewF->arg_end(); ++II, ++NI) { ValueMap[II] = NI; NI->setName(II->getName()); NI->addAttr(F->getAttributes().getParamAttributes(II->getArgNo() + 1)); } NewF->setAttributes(NewF->getAttributes().addAttr( 0, F->getAttributes().getRetAttributes())); // Perform the cloning. SmallVector<ReturnInst*,100> Returns; CloneFunctionInto(NewF, F, ValueMap, false, Returns); std::vector<Value*> fargs; for(Function::arg_iterator ai = NewF->arg_begin(), ae= NewF->arg_end(); ai != ae; ++ai) { fargs.push_back(ai); } NewF->setAttributes(NewF->getAttributes().addAttr( ~0, F->getAttributes().getFnAttributes())); //Get the point to insert the GEP instr. SmallVector<Value*, 8> Ops(CI->op_begin()+1, CI->op_end()); Instruction *InsertPoint; for (BasicBlock::iterator insrt = NewF->front().begin(); isa<AllocaInst>(InsertPoint = insrt); ++insrt) {;} NI = NewF->arg_begin(); SmallVector<Value*, 8> Indices; Indices.append(GEP->op_begin()+1, GEP->op_end()); GetElementPtrInst *GEP_new = GetElementPtrInst::Create(cast<Value>(NI), Indices, "", InsertPoint); fargs.at(argNum)->replaceAllUsesWith(GEP_new); unsigned j = argNum + 1; for(; j < CI->getNumOperands();j++) { if(CI->getOperand(j) == GEP) fargs.at(j)->replaceAllUsesWith(GEP_new); } SmallVector<AttributeWithIndex, 8> AttributesVec; // Get the initial attributes of the call AttrListPtr CallPAL = CI->getAttributes(); Attributes RAttrs = CallPAL.getRetAttributes(); Attributes FnAttrs = CallPAL.getFnAttributes(); if (RAttrs) AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs)); SmallVector<Value*, 8> Args; Args.push_back(GEP->getPointerOperand()); for(unsigned j =1;j<CI->getNumOperands();j++) { Args.push_back(CI->getOperand(j)); // position in the AttributesVec if (Attributes Attrs = CallPAL.getParamAttributes(j)) AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs)); } // Create the new attributes vec. if (FnAttrs != Attribute::None) AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs)); AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()); CallInst *CallI = CallInst::Create(NewF,Args,"", CI); CallI->setCallingConv(CI->getCallingConv()); CallI->setAttributes(NewCallPAL); CI->replaceAllUsesWith(CallI); CI->eraseFromParent(); changed = true; } } } } while(changed); return true; }
// Replace "packW" and "unpackW" intrinsics by insert/extract operations and // update the uses accordingly. void FunctionVectorizer::generatePackUnpackCode(Function* f, const WFVInfo& info) { assert (f); SmallVector<CallInst*, 16> eraseVec; for (auto &BB : *f) { Instruction* allocPos = BB.getFirstInsertionPt(); for (auto &I : BB) { Instruction* inst = &I; if (isUnpackWFunctionCall(inst)) { DEBUG_WFV( outs() << "generateUnpackCode(" << *inst << " )\n"; ); CallInst* unpackCall = cast<CallInst>(inst); Value* value = unpackCall->getArgOperand(0); Value* indexVal = unpackCall->getArgOperand(1); // Extract scalar values. Value* extract = generateHorizontalExtract(value, indexVal, unpackCall->getName(), allocPos, unpackCall, info); // If the type only matches structurally, create an additional bitcast. Type* oldType = unpackCall->getType(); Type* newType = extract->getType(); if (oldType != newType) { assert (newType->canLosslesslyBitCastTo(oldType) || WFV::typesMatch(oldType, newType)); Instruction* bc = new BitCastInst(extract, oldType, "", unpackCall); // Copy properties from unpackCall. WFV::copyMetadata(bc, *unpackCall); extract = bc; } // Rewire the use. assert (unpackCall->getNumUses() == 1); Value* use = *unpackCall->use_begin(); assert (isa<Instruction>(use)); Instruction* scalarUse = cast<Instruction>(use); scalarUse->replaceUsesOfWith(unpackCall, extract); // Erase now unused unpack call. eraseVec.push_back(unpackCall); // If the returned extract operation is an alloca, we have to // make sure that all changes to that memory location are // correctly written back to the original memory from which // the sub-element was extracted. // This means we have to insert merge and store operations // after every use of this value (including "forwarded" uses // via casts, phis, and GEPs). // However, we must only merge back those values that were // modified. This is not only for efficiency, but also for // correctness, since there may be uninitialized pointers in // a structure, which we must not load/store from/to (see // test_struct_extra05 with all analyses disabled). if (isa<AllocaInst>(extract) || (isa<BitCastInst>(extract) && isa<AllocaInst>(cast<BitCastInst>(extract)->getOperand(0)))) { generateWriteBackOperations(cast<Instruction>(extract), cast<Instruction>(extract), value, indexVal, info); } } else if (isPackWFunctionCall(inst)) { DEBUG_WFV( outs() << "generatePackCode(" << *inst << " )\n"; ); CallInst* packCall = cast<CallInst>(inst); assert (WFV::isVectorizedType(*packCall->getType()) && "packCall should have vector return type after inst vectorization!"); SmallVector<Value*, 8> scalarVals(info.mVectorizationFactor); // Get scalar results for merge. for (unsigned i=0; i<info.mVectorizationFactor; ++i) { scalarVals[i] = packCall->getArgOperand(i); } // Merge scalar results. Instruction* merge = generateHorizontalMerge(scalarVals, packCall->getType(), "", packCall, info); // Rewire the uses. packCall->replaceAllUsesWith(merge); // Copy properties from packCall. WFV::copyMetadata(merge, *packCall); // Erase now unused pack call. eraseVec.push_back(packCall); }
// ============================================================================= // createProcess // // Create a new function that contains a call to the old function. // We inline the call in order to clone the old function's implementation. // ============================================================================= Function *TLMBasicPassImpl::createProcess(Function *oldProc, sc_core::sc_module *initiatorMod) { LLVMContext &context = getGlobalContext(); IntegerType *intType; if (this->is64Bit) { intType = Type::getInt64Ty(context); } else { intType = Type::getInt32Ty(context); } // Retrieve a pointer to the initiator module ConstantInt *initiatorModVal = ConstantInt::getSigned(intType,reinterpret_cast<intptr_t>(initiatorMod)); FunctionType *funType = oldProc->getFunctionType(); Type *type = funType->getParamType(0); IntToPtrInst *thisAddr = new IntToPtrInst(initiatorModVal, type, ""); // Compute the type of the new function FunctionType *oldProcType = oldProc->getFunctionType(); Value **argsBegin = new Value*[1]; Value **argsEnd = argsBegin; *argsEnd++ = thisAddr; const unsigned argsSize = argsEnd-argsBegin; Value **args = argsBegin; assert(oldProcType->getNumParams()==argsSize); assert(!oldProc->isDeclaration()); std::vector<Type*> argTypes; for (unsigned i = 0; i!=argsSize; ++i) argTypes.push_back(oldProcType->getParamType(i)); FunctionType *newProcType = FunctionType::get(oldProc->getReturnType(), ArrayRef<Type*>(argTypes), false); // Create the new function std::ostringstream id; id << proc_counter++; std::string name = oldProc->getName().str()+std::string("_clone_")+id.str(); Function *newProc = Function::Create(newProcType, Function::ExternalLinkage, name, this->llvmMod); assert(newProc->empty()); newProc->addFnAttr(Attributes::InlineHint); { // Set name of newfunc arguments and complete args Function::arg_iterator nai = newProc->arg_begin(); Function::arg_iterator oai = oldProc->arg_begin(); for (unsigned i = 0; i!=argsSize; ++i, ++oai) { nai->setName(oai->getName()); args[i] = nai; ++nai; } assert(nai==newProc->arg_end()); assert(oai==oldProc->arg_end()); } // Create call to old function BasicBlock *bb = BasicBlock::Create(context, "entry", newProc); IRBuilder<> *irb = new IRBuilder<>(context); irb->SetInsertPoint(bb); CallInst *ci = irb->CreateCall(oldProc, ArrayRef<Value*>(argsBegin, argsEnd)); bb->getInstList().insert(ci, thisAddr); if (ci->getType()->isVoidTy()) irb->CreateRetVoid(); else irb->CreateRet(ci); // The function should be valid now verifyFunction(*newProc); { // Inline the call DataLayout *td = new DataLayout(this->llvmMod); InlineFunctionInfo i(NULL, td); bool success = InlineFunction(ci, i); assert(success); verifyFunction(*newProc); } //newProc->dump(); return newProc; }
bool NVVMReflect::handleFunction(Function *ReflectFunction) { // Validate _reflect function assert(ReflectFunction->isDeclaration() && "_reflect function should not have a body"); assert(ReflectFunction->getReturnType()->isIntegerTy() && "_reflect's return type should be integer"); std::vector<Instruction *> ToRemove; // Go through the uses of ReflectFunction in this Function. // Each of them should a CallInst with a ConstantArray argument. // First validate that. If the c-string corresponding to the // ConstantArray can be found successfully, see if it can be // found in VarMap. If so, replace the uses of CallInst with the // value found in VarMap. If not, replace the use with value 0. // IR for __nvvm_reflect calls differs between CUDA versions: // CUDA 6.5 and earlier uses this sequence: // %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8 // (i8 addrspace(4)* getelementptr inbounds // ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0)) // %reflect = tail call i32 @__nvvm_reflect(i8* %ptr) // // Value returned by Sym->getOperand(0) is a Constant with a // ConstantDataSequential operand which can be converted to string and used // for lookup. // // CUDA 7.0 does it slightly differently: // %reflect = call i32 @__nvvm_reflect(i8* addrspacecast // (i8 addrspace(1)* getelementptr inbounds // ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*)) // // In this case, we get a Constant with a GlobalVariable operand and we need // to dig deeper to find its initializer with the string we'll use for lookup. for (User *U : ReflectFunction->users()) { assert(isa<CallInst>(U) && "Only a call instruction can use _reflect"); CallInst *Reflect = cast<CallInst>(U); assert((Reflect->getNumOperands() == 2) && "Only one operand expect for _reflect function"); // In cuda, we will have an extra constant-to-generic conversion of // the string. const Value *Str = Reflect->getArgOperand(0); if (isa<CallInst>(Str)) { // CUDA path const CallInst *ConvCall = cast<CallInst>(Str); Str = ConvCall->getArgOperand(0); } assert(isa<ConstantExpr>(Str) && "Format of _reflect function not recognized"); const ConstantExpr *GEP = cast<ConstantExpr>(Str); const Value *Sym = GEP->getOperand(0); assert(isa<Constant>(Sym) && "Format of _reflect function not recognized"); const Value *Operand = cast<Constant>(Sym)->getOperand(0); if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) { // For CUDA-7.0 style __nvvm_reflect calls we need to find operand's // initializer. assert(GV->hasInitializer() && "Format of _reflect function not recognized"); const Constant *Initializer = GV->getInitializer(); Operand = Initializer; } assert(isa<ConstantDataSequential>(Operand) && "Format of _reflect function not recognized"); assert(cast<ConstantDataSequential>(Operand)->isCString() && "Format of _reflect function not recognized"); std::string ReflectArg = cast<ConstantDataSequential>(Operand)->getAsString(); ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1); DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n"); int ReflectVal = 0; // The default value is 0 if (VarMap.find(ReflectArg) != VarMap.end()) { ReflectVal = VarMap[ReflectArg]; } Reflect->replaceAllUsesWith( ConstantInt::get(Reflect->getType(), ReflectVal)); ToRemove.push_back(Reflect); } if (ToRemove.size() == 0) return false; for (unsigned i = 0, e = ToRemove.size(); i != e; ++i) ToRemove[i]->eraseFromParent(); return true; }
void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) return; IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I)) return; DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); const Function &ContainingFunction = *I.getParent()->getParent(); // FIXME: We should also try to get this value from the reqd_work_group_size // function attribute if it is available. unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction); int AllocaSize = WorkGroupSize * Mod->getDataLayout().getTypeAllocSize(AllocaTy); if (AllocaSize > LocalMemAvailable) { DEBUG(dbgs() << " Not enough local memory to promote alloca.\n"); return; } std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); LocalMemAvailable -= AllocaSize; Function *F = I.getParent()->getParent(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( *Mod, GVTy, false, GlobalValue::InternalLinkage, UndefValue::get(GVTy), Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); GV->setUnnamedAddr(true); GV->setAlignment(I.getAlignment()); Value *TCntY, *TCntZ; std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder); Value *TIdX = getWorkitemID(Builder, 0); Value *TIdY = getWorkitemID(Builder, 1); Value *TIdZ = getWorkitemID(Builder, 2); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); Value *Indices[] = { Constant::getNullValue(Type::getInt32Ty(Mod->getContext())), TID }; Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (Value *V : WorkList) { CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); // The operand's value should be corrected on its own. if (isa<AddrSpaceCastInst>(V)) continue; // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); continue; } IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call); if (!Intr) { // FIXME: What is this for? It doesn't make sense to promote arbitrary // function calls. If the call is to a defined function that can also be // promoted, we should be able to do this once that function is also // rewritten. std::vector<Type*> ArgTypes; for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands(); ArgIdx != ArgEnd; ++ArgIdx) { ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType()); } Function *F = Call->getCalledFunction(); FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes, F->isVarArg()); Constant *C = Mod->getOrInsertFunction((F->getName() + ".local").str(), NewType, F->getAttributes()); Function *NewF = cast<Function>(C); Call->setCalledFunction(NewF); continue; } Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), MemCpy->getLength(), MemCpy->getAlignment(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast<MemMoveInst>(Intr); Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(), MemMove->getLength(), MemMove->getAlignment(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::invariant_group_barrier: Intr->eraseFromParent(); // FIXME: I think the invariant marker should still theoretically apply, // but the intrinsics need to be changed to accept pointers with any // address space. continue; case Intrinsic::objectsize: { Value *Src = Intr->getOperand(0); Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall(ObjectSize, { Src, Intr->getOperand(1) }); Intr->replaceAllUsesWith(NewCall); Intr->eraseFromParent(); continue; } default: Intr->dump(); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } }
bool NVVMReflect::runOnFunction(Function &F) { if (!NVVMReflectEnabled) return false; if (F.getName() == NVVM_REFLECT_FUNCTION) { assert(F.isDeclaration() && "_reflect function should not have a body"); assert(F.getReturnType()->isIntegerTy() && "_reflect's return type should be integer"); return false; } SmallVector<Instruction *, 4> ToRemove; // Go through the calls in this function. Each call to __nvvm_reflect or // llvm.nvvm.reflect should be a CallInst with a ConstantArray argument. // First validate that. If the c-string corresponding to the ConstantArray can // be found successfully, see if it can be found in VarMap. If so, replace the // uses of CallInst with the value found in VarMap. If not, replace the use // with value 0. // The IR for __nvvm_reflect calls differs between CUDA versions. // // CUDA 6.5 and earlier uses this sequence: // %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8 // (i8 addrspace(4)* getelementptr inbounds // ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0)) // %reflect = tail call i32 @__nvvm_reflect(i8* %ptr) // // The value returned by Sym->getOperand(0) is a Constant with a // ConstantDataSequential operand which can be converted to string and used // for lookup. // // CUDA 7.0 does it slightly differently: // %reflect = call i32 @__nvvm_reflect(i8* addrspacecast // (i8 addrspace(1)* getelementptr inbounds // ([8 x i8], [8 x i8] addrspace(1)* @str, i32 0, i32 0) to i8*)) // // In this case, we get a Constant with a GlobalVariable operand and we need // to dig deeper to find its initializer with the string we'll use for lookup. for (Instruction &I : instructions(F)) { CallInst *Call = dyn_cast<CallInst>(&I); if (!Call) continue; Function *Callee = Call->getCalledFunction(); if (!Callee || (Callee->getName() != NVVM_REFLECT_FUNCTION && Callee->getIntrinsicID() != Intrinsic::nvvm_reflect)) continue; // FIXME: Improve error handling here and elsewhere in this pass. assert(Call->getNumOperands() == 2 && "Wrong number of operands to __nvvm_reflect function"); // In cuda 6.5 and earlier, we will have an extra constant-to-generic // conversion of the string. const Value *Str = Call->getArgOperand(0); if (const CallInst *ConvCall = dyn_cast<CallInst>(Str)) { // FIXME: Add assertions about ConvCall. Str = ConvCall->getArgOperand(0); } assert(isa<ConstantExpr>(Str) && "Format of __nvvm__reflect function not recognized"); const ConstantExpr *GEP = cast<ConstantExpr>(Str); const Value *Sym = GEP->getOperand(0); assert(isa<Constant>(Sym) && "Format of __nvvm_reflect function not recognized"); const Value *Operand = cast<Constant>(Sym)->getOperand(0); if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Operand)) { // For CUDA-7.0 style __nvvm_reflect calls, we need to find the operand's // initializer. assert(GV->hasInitializer() && "Format of _reflect function not recognized"); const Constant *Initializer = GV->getInitializer(); Operand = Initializer; } assert(isa<ConstantDataSequential>(Operand) && "Format of _reflect function not recognized"); assert(cast<ConstantDataSequential>(Operand)->isCString() && "Format of _reflect function not recognized"); StringRef ReflectArg = cast<ConstantDataSequential>(Operand)->getAsString(); ReflectArg = ReflectArg.substr(0, ReflectArg.size() - 1); LLVM_DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n"); int ReflectVal = 0; // The default value is 0 if (ReflectArg == "__CUDA_FTZ") { // Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag. Our // choice here must be kept in sync with AutoUpgrade, which uses the same // technique to detect whether ftz is enabled. if (auto *Flag = mdconst::extract_or_null<ConstantInt>( F.getParent()->getModuleFlag("nvvm-reflect-ftz"))) ReflectVal = Flag->getSExtValue(); } else if (ReflectArg == "__CUDA_ARCH") { ReflectVal = SmVersion * 10; } Call->replaceAllUsesWith(ConstantInt::get(Call->getType(), ReflectVal)); ToRemove.push_back(Call); } for (Instruction *I : ToRemove) I->eraseFromParent(); return ToRemove.size() > 0; }
void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) { IRBuilder<> Builder(&I); // First try to replace the alloca with a vector Type *AllocaTy = I.getAllocatedType(); DEBUG(dbgs() << "Trying to promote " << I << '\n'); if (tryPromoteAllocaToVector(&I)) return; DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); // FIXME: This is the maximum work group size. We should try to get // value from the reqd_work_group_size function attribute if it is // available. unsigned WorkGroupSize = 256; int AllocaSize = WorkGroupSize * Mod->getDataLayout()->getTypeAllocSize(AllocaTy); if (AllocaSize > LocalMemAvailable) { DEBUG(dbgs() << " Not enough local memory to promote alloca.\n"); return; } std::vector<Value*> WorkList; if (!collectUsesWithPtrTypes(&I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); return; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); LocalMemAvailable -= AllocaSize; GlobalVariable *GV = new GlobalVariable( *Mod, ArrayType::get(I.getAllocatedType(), 256), false, GlobalValue::ExternalLinkage, 0, I.getName(), 0, GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); FunctionType *FTy = FunctionType::get( Type::getInt32Ty(Mod->getContext()), false); AttributeSet AttrSet; AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone); Value *ReadLocalSizeY = Mod->getOrInsertFunction( "llvm.r600.read.local.size.y", FTy, AttrSet); Value *ReadLocalSizeZ = Mod->getOrInsertFunction( "llvm.r600.read.local.size.z", FTy, AttrSet); Value *ReadTIDIGX = Mod->getOrInsertFunction( "llvm.r600.read.tidig.x", FTy, AttrSet); Value *ReadTIDIGY = Mod->getOrInsertFunction( "llvm.r600.read.tidig.y", FTy, AttrSet); Value *ReadTIDIGZ = Mod->getOrInsertFunction( "llvm.r600.read.tidig.z", FTy, AttrSet); Value *TCntY = Builder.CreateCall(ReadLocalSizeY); Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ); Value *TIdX = Builder.CreateCall(ReadTIDIGX); Value *TIdY = Builder.CreateCall(ReadTIDIGY); Value *TIdZ = Builder.CreateCall(ReadTIDIGZ); Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ); Tmp0 = Builder.CreateMul(Tmp0, TIdX); Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ); Value *TID = Builder.CreateAdd(Tmp0, Tmp1); TID = Builder.CreateAdd(TID, TIdZ); std::vector<Value*> Indices; Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext()))); Indices.push_back(TID); Value *Offset = Builder.CreateGEP(GV, Indices); I.mutateType(Offset->getType()); I.replaceAllUsesWith(Offset); I.eraseFromParent(); for (std::vector<Value*>::iterator i = WorkList.begin(), e = WorkList.end(); i != e; ++i) { Value *V = *i; CallInst *Call = dyn_cast<CallInst>(V); if (!Call) { Type *EltTy = V->getType()->getPointerElementType(); PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); // The operand's value should be corrected on its own. if (isa<AddrSpaceCastInst>(V)) continue; // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); continue; } IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call); if (!Intr) { std::vector<Type*> ArgTypes; for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands(); ArgIdx != ArgEnd; ++ArgIdx) { ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType()); } Function *F = Call->getCalledFunction(); FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes, F->isVarArg()); Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType, F->getAttributes()); Function *NewF = cast<Function>(C); Call->setCalledFunction(NewF); continue; } Builder.SetInsertPoint(Intr); switch (Intr->getIntrinsicID()) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: // These intrinsics are for address space 0 only Intr->eraseFromParent(); continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), MemCpy->getLength(), MemCpy->getAlignment(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast<MemSetInst>(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), MemSet->getLength(), MemSet->getAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; } default: Intr->dump(); llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } }
bool ObjCARCContract::runOnFunction(Function &F) { if (!EnableARCOpts) return false; // If nothing in the Module uses ARC, don't do anything. if (!Run) return false; Changed = false; AA = &getAnalysis<AliasAnalysis>(); DT = &getAnalysis<DominatorTree>(); PA.setAA(&getAnalysis<AliasAnalysis>()); // Track whether it's ok to mark objc_storeStrong calls with the "tail" // keyword. Be conservative if the function has variadic arguments. // It seems that functions which "return twice" are also unsafe for the // "tail" argument, because they are setjmp, which could need to // return to an earlier stack state. bool TailOkForStoreStrongs = !F.isVarArg() && !F.callsFunctionThatReturnsTwice(); // For ObjC library calls which return their argument, replace uses of the // argument with uses of the call return value, if it dominates the use. This // reduces register pressure. SmallPtrSet<Instruction *, 4> DependingInstructions; SmallPtrSet<const BasicBlock *, 4> Visited; for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) { Instruction *Inst = &*I++; DEBUG(dbgs() << "ObjCARCContract: Visiting: " << *Inst << "\n"); // Only these library routines return their argument. In particular, // objc_retainBlock does not necessarily return its argument. InstructionClass Class = GetBasicInstructionClass(Inst); switch (Class) { case IC_Retain: case IC_FusedRetainAutorelease: case IC_FusedRetainAutoreleaseRV: break; case IC_Autorelease: case IC_AutoreleaseRV: if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited)) continue; break; case IC_RetainRV: { // If we're compiling for a target which needs a special inline-asm // marker to do the retainAutoreleasedReturnValue optimization, // insert it now. if (!RetainRVMarker) break; BasicBlock::iterator BBI = Inst; BasicBlock *InstParent = Inst->getParent(); // Step up to see if the call immediately precedes the RetainRV call. // If it's an invoke, we have to cross a block boundary. And we have // to carefully dodge no-op instructions. do { if (&*BBI == InstParent->begin()) { BasicBlock *Pred = InstParent->getSinglePredecessor(); if (!Pred) goto decline_rv_optimization; BBI = Pred->getTerminator(); break; } --BBI; } while (IsNoopInstruction(BBI)); if (&*BBI == GetObjCArg(Inst)) { DEBUG(dbgs() << "ObjCARCContract: Adding inline asm marker for " "retainAutoreleasedReturnValue optimization.\n"); Changed = true; InlineAsm *IA = InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()), /*isVarArg=*/false), RetainRVMarker->getString(), /*Constraints=*/"", /*hasSideEffects=*/true); CallInst::Create(IA, "", Inst); } decline_rv_optimization: break; } case IC_InitWeak: { // objc_initWeak(p, null) => *p = null CallInst *CI = cast<CallInst>(Inst); if (IsNullOrUndef(CI->getArgOperand(1))) { Value *Null = ConstantPointerNull::get(cast<PointerType>(CI->getType())); Changed = true; new StoreInst(Null, CI->getArgOperand(0), CI); DEBUG(dbgs() << "OBJCARCContract: Old = " << *CI << "\n" << " New = " << *Null << "\n"); CI->replaceAllUsesWith(Null); CI->eraseFromParent(); } continue; } case IC_Release: ContractRelease(Inst, I); continue; case IC_User: // Be conservative if the function has any alloca instructions. // Technically we only care about escaping alloca instructions, // but this is sufficient to handle some interesting cases. if (isa<AllocaInst>(Inst)) TailOkForStoreStrongs = false; continue; case IC_IntrinsicUser: // Remove calls to @clang.arc.use(...). Inst->eraseFromParent(); continue; default: continue; } DEBUG(dbgs() << "ObjCARCContract: Finished List.\n\n"); // Don't use GetObjCArg because we don't want to look through bitcasts // and such; to do the replacement, the argument must have type i8*. const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0); for (;;) { // If we're compiling bugpointed code, don't get in trouble. if (!isa<Instruction>(Arg) && !isa<Argument>(Arg)) break; // Look through the uses of the pointer. for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE; ) { Use &U = UI.getUse(); unsigned OperandNo = UI.getOperandNo(); ++UI; // Increment UI now, because we may unlink its element. // If the call's return value dominates a use of the call's argument // value, rewrite the use to use the return value. We check for // reachability here because an unreachable call is considered to // trivially dominate itself, which would lead us to rewriting its // argument in terms of its return value, which would lead to // infinite loops in GetObjCArg. if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) { Changed = true; Instruction *Replacement = Inst; Type *UseTy = U.get()->getType(); if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) { // For PHI nodes, insert the bitcast in the predecessor block. unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo); BasicBlock *BB = PHI->getIncomingBlock(ValNo); if (Replacement->getType() != UseTy) Replacement = new BitCastInst(Replacement, UseTy, "", &BB->back()); // While we're here, rewrite all edges for this PHI, rather // than just one use at a time, to minimize the number of // bitcasts we emit. for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) if (PHI->getIncomingBlock(i) == BB) { // Keep the UI iterator valid. if (&PHI->getOperandUse( PHINode::getOperandNumForIncomingValue(i)) == &UI.getUse()) ++UI; PHI->setIncomingValue(i, Replacement); } } else { if (Replacement->getType() != UseTy) Replacement = new BitCastInst(Replacement, UseTy, "", cast<Instruction>(U.getUser())); U.set(Replacement); } } } // If Arg is a no-op casted pointer, strip one level of casts and iterate. if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg)) Arg = BI->getOperand(0); else if (isa<GEPOperator>(Arg) && cast<GEPOperator>(Arg)->hasAllZeroIndices()) Arg = cast<GEPOperator>(Arg)->getPointerOperand(); else if (isa<GlobalAlias>(Arg) && !cast<GlobalAlias>(Arg)->mayBeOverridden()) Arg = cast<GlobalAlias>(Arg)->getAliasee(); else break; } } // If this function has no escaping allocas or suspicious vararg usage, // objc_storeStrong calls can be marked with the "tail" keyword. if (TailOkForStoreStrongs) for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(), E = StoreStrongCalls.end(); I != E; ++I) (*I)->setTailCall(); StoreStrongCalls.clear(); return Changed; }