static void EmitCleanup(CodeGenFunction &CGF, EHScopeStack::Cleanup *Fn, EHScopeStack::Cleanup::Flags flags, llvm::Value *ActiveFlag) { // EH cleanups always occur within a terminate scope. if (flags.isForEHCleanup()) CGF.EHStack.pushTerminate(); // If there's an active flag, load it and skip the cleanup if it's // false. llvm::BasicBlock *ContBB = nullptr; if (ActiveFlag) { ContBB = CGF.createBasicBlock("cleanup.done"); llvm::BasicBlock *CleanupBB = CGF.createBasicBlock("cleanup.action"); llvm::Value *IsActive = CGF.Builder.CreateLoad(ActiveFlag, "cleanup.is_active"); CGF.Builder.CreateCondBr(IsActive, CleanupBB, ContBB); CGF.EmitBlock(CleanupBB); } // Ask the cleanup to emit itself. Fn->Emit(CGF, flags); assert(CGF.HaveInsertPoint() && "cleanup ended with no insertion point?"); // Emit the continuation block if there was an active flag. if (ActiveFlag) CGF.EmitBlock(ContBB); // Leave the terminate scope. if (flags.isForEHCleanup()) CGF.EHStack.popTerminate(); }
void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args) { // Emit a call to cudaSetupArgument for each arg in Args. llvm::FunctionCallee cudaSetupArgFn = getSetupArgumentFn(); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); CharUnits Offset = CharUnits::Zero(); for (const VarDecl *A : Args) { CharUnits TyWidth, TyAlign; std::tie(TyWidth, TyAlign) = CGM.getContext().getTypeInfoInChars(A->getType()); Offset = Offset.alignTo(TyAlign); llvm::Value *Args[] = { CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(), VoidPtrTy), llvm::ConstantInt::get(SizeTy, TyWidth.getQuantity()), llvm::ConstantInt::get(SizeTy, Offset.getQuantity()), }; llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args); llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0); llvm::Value *CBZero = CGF.Builder.CreateICmpEQ(CB, Zero); llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next"); CGF.Builder.CreateCondBr(CBZero, NextBlock, EndBlock); CGF.EmitBlock(NextBlock); Offset += TyWidth; } // Emit the call to cudaLaunch llvm::FunctionCallee cudaLaunchFn = getLaunchFn(); llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy); CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg); CGF.EmitBranch(EndBlock); CGF.EmitBlock(EndBlock); }
// Emit suspend expression which roughly looks like: // // auto && x = CommonExpr(); // if (!x.await_ready()) { // llvm_coro_save(); // x.await_suspend(...); (*) // llvm_coro_suspend(); (**) // } // x.await_resume(); // // where the result of the entire expression is the result of x.await_resume() // // (*) If x.await_suspend return type is bool, it allows to veto a suspend: // if (x.await_suspend(...)) // llvm_coro_suspend(); // // (**) llvm_coro_suspend() encodes three possible continuations as // a switch instruction: // // %where-to = call i8 @llvm.coro.suspend(...) // switch i8 %where-to, label %coro.ret [ ; jump to epilogue to suspend // i8 0, label %yield.ready ; go here when resumed // i8 1, label %yield.cleanup ; go here when destroyed // ] // // See llvm's docs/Coroutines.rst for more details. // static RValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro, CoroutineSuspendExpr const &S, AwaitKind Kind, AggValueSlot aggSlot, bool ignoreResult) { auto *E = S.getCommonExpr(); auto Binder = CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E); auto UnbindOnExit = llvm::make_scope_exit([&] { Binder.unbind(CGF); }); auto Prefix = buildSuspendPrefixStr(Coro, Kind); BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready")); BasicBlock *SuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend")); BasicBlock *CleanupBlock = CGF.createBasicBlock(Prefix + Twine(".cleanup")); // If expression is ready, no need to suspend. CGF.EmitBranchOnBoolExpr(S.getReadyExpr(), ReadyBlock, SuspendBlock, 0); // Otherwise, emit suspend logic. CGF.EmitBlock(SuspendBlock); auto &Builder = CGF.Builder; llvm::Function *CoroSave = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_save); auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy); auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr}); auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr()); if (SuspendRet != nullptr) { // Veto suspension if requested by bool returning await_suspend. assert(SuspendRet->getType()->isIntegerTy(1) && "Sema should have already checked that it is void or bool"); BasicBlock *RealSuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend.bool")); CGF.Builder.CreateCondBr(SuspendRet, RealSuspendBlock, ReadyBlock); SuspendBlock = RealSuspendBlock; CGF.EmitBlock(RealSuspendBlock); } // Emit the suspend point. const bool IsFinalSuspend = (Kind == AwaitKind::Final); llvm::Function *CoroSuspend = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend); auto *SuspendResult = Builder.CreateCall( CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)}); // Create a switch capturing three possible continuations. auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2); Switch->addCase(Builder.getInt8(0), ReadyBlock); Switch->addCase(Builder.getInt8(1), CleanupBlock); // Emit cleanup for this suspend point. CGF.EmitBlock(CleanupBlock); CGF.EmitBranchThroughCleanup(Coro.CleanupJD); // Emit await_resume expression. CGF.EmitBlock(ReadyBlock); return CGF.EmitAnyExpr(S.getResumeExpr(), aggSlot, ignoreResult); }
static void EmitOMPIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, const std::function<void()> &BodyOpGen) { llvm::Value *CallBool = CGF.EmitScalarConversion( IfCond, CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), CGF.getContext().BoolTy); auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); auto *ContBlock = CGF.createBasicBlock("omp_if.end"); // Generate the branch (If-stmt) CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); CGF.EmitBlock(ThenBlock); BodyOpGen(); // Emit the rest of bblocks/branches CGF.EmitBranch(ContBlock); CGF.EmitBlock(ContBlock, true); }
void CGNVCUDARuntime::EmitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args) { // Build the argument value list and the argument stack struct type. SmallVector<llvm::Value *, 16> ArgValues; std::vector<llvm::Type *> ArgTypes; for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end(); I != E; ++I) { llvm::Value *V = CGF.GetAddrOfLocalVar(*I); ArgValues.push_back(V); assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType"); ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType()); } llvm::StructType *ArgStackTy = llvm::StructType::get( CGF.getLLVMContext(), ArgTypes); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); // Emit the calls to cudaSetupArgument llvm::Constant *cudaSetupArgFn = getSetupArgumentFn(); for (unsigned I = 0, E = Args.size(); I != E; ++I) { llvm::Value *Args[3]; llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next"); Args[0] = CGF.Builder.CreatePointerCast(ArgValues[I], VoidPtrTy); Args[1] = CGF.Builder.CreateIntCast( llvm::ConstantExpr::getSizeOf(ArgTypes[I]), SizeTy, false); Args[2] = CGF.Builder.CreateIntCast( llvm::ConstantExpr::getOffsetOf(ArgStackTy, I), SizeTy, false); llvm::CallSite CS = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args); llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0); llvm::Value *CSZero = CGF.Builder.CreateICmpEQ(CS.getInstruction(), Zero); CGF.Builder.CreateCondBr(CSZero, NextBlock, EndBlock); CGF.EmitBlock(NextBlock); } // Emit the call to cudaLaunch llvm::Constant *cudaLaunchFn = getLaunchFn(); llvm::Value *Arg = CGF.Builder.CreatePointerCast(CGF.CurFn, CharPtrTy); CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg); CGF.EmitBranch(EndBlock); CGF.EmitBlock(EndBlock); }
static RValue PerformReturnAdjustment(CodeGenFunction &CGF, QualType ResultType, RValue RV, const ThunkInfo &Thunk) { // Emit the return adjustment. bool NullCheckValue = !ResultType->isReferenceType(); llvm::BasicBlock *AdjustNull = nullptr; llvm::BasicBlock *AdjustNotNull = nullptr; llvm::BasicBlock *AdjustEnd = nullptr; llvm::Value *ReturnValue = RV.getScalarVal(); if (NullCheckValue) { AdjustNull = CGF.createBasicBlock("adjust.null"); AdjustNotNull = CGF.createBasicBlock("adjust.notnull"); AdjustEnd = CGF.createBasicBlock("adjust.end"); llvm::Value *IsNull = CGF.Builder.CreateIsNull(ReturnValue); CGF.Builder.CreateCondBr(IsNull, AdjustNull, AdjustNotNull); CGF.EmitBlock(AdjustNotNull); } auto ClassDecl = ResultType->getPointeeType()->getAsCXXRecordDecl(); auto ClassAlign = CGF.CGM.getClassPointerAlignment(ClassDecl); ReturnValue = CGF.CGM.getCXXABI().performReturnAdjustment(CGF, Address(ReturnValue, ClassAlign), Thunk.Return); if (NullCheckValue) { CGF.Builder.CreateBr(AdjustEnd); CGF.EmitBlock(AdjustNull); CGF.Builder.CreateBr(AdjustEnd); CGF.EmitBlock(AdjustEnd); llvm::PHINode *PHI = CGF.Builder.CreatePHI(ReturnValue->getType(), 2); PHI->addIncoming(ReturnValue, AdjustNotNull); PHI->addIncoming(llvm::Constant::getNullValue(ReturnValue->getType()), AdjustNull); ReturnValue = PHI; } return RValue::get(ReturnValue); }
static RValue PerformReturnAdjustment(CodeGenFunction &CGF, QualType ResultType, RValue RV, const ThunkInfo &Thunk) { // Emit the return adjustment. bool NullCheckValue = !ResultType->isReferenceType(); llvm::BasicBlock *AdjustNull = 0; llvm::BasicBlock *AdjustNotNull = 0; llvm::BasicBlock *AdjustEnd = 0; llvm::Value *ReturnValue = RV.getScalarVal(); if (NullCheckValue) { AdjustNull = CGF.createBasicBlock("adjust.null"); AdjustNotNull = CGF.createBasicBlock("adjust.notnull"); AdjustEnd = CGF.createBasicBlock("adjust.end"); llvm::Value *IsNull = CGF.Builder.CreateIsNull(ReturnValue); CGF.Builder.CreateCondBr(IsNull, AdjustNull, AdjustNotNull); CGF.EmitBlock(AdjustNotNull); } ReturnValue = PerformTypeAdjustment(CGF, ReturnValue, Thunk.Return.NonVirtual, Thunk.Return.VBaseOffsetOffset, /*IsReturnAdjustment*/true); if (NullCheckValue) { CGF.Builder.CreateBr(AdjustEnd); CGF.EmitBlock(AdjustNull); CGF.Builder.CreateBr(AdjustEnd); CGF.EmitBlock(AdjustEnd); llvm::PHINode *PHI = CGF.Builder.CreatePHI(ReturnValue->getType(), 2); PHI->addIncoming(ReturnValue, AdjustNotNull); PHI->addIncoming(llvm::Constant::getNullValue(ReturnValue->getType()), AdjustNull); ReturnValue = PHI; } return RValue::get(ReturnValue); }
/// emitNonZeroVLAInit - Emit the "zero" initialization of a /// variable-length array whose elements have a non-zero bit-pattern. /// /// \param src - a char* pointing to the bit-pattern for a single /// base element of the array /// \param sizeInChars - the total size of the VLA, in chars /// \param align - the total alignment of the VLA static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType, llvm::Value *dest, llvm::Value *src, llvm::Value *sizeInChars) { std::pair<CharUnits,CharUnits> baseSizeAndAlign = CGF.getContext().getTypeInfoInChars(baseType); CGBuilderTy &Builder = CGF.Builder; llvm::Value *baseSizeInChars = llvm::ConstantInt::get(CGF.IntPtrTy, baseSizeAndAlign.first.getQuantity()); llvm::Type *i8p = Builder.getInt8PtrTy(); llvm::Value *begin = Builder.CreateBitCast(dest, i8p, "vla.begin"); llvm::Value *end = Builder.CreateInBoundsGEP(dest, sizeInChars, "vla.end"); llvm::BasicBlock *originBB = CGF.Builder.GetInsertBlock(); llvm::BasicBlock *loopBB = CGF.createBasicBlock("vla-init.loop"); llvm::BasicBlock *contBB = CGF.createBasicBlock("vla-init.cont"); // Make a loop over the VLA. C99 guarantees that the VLA element // count must be nonzero. CGF.EmitBlock(loopBB); llvm::PHINode *cur = Builder.CreatePHI(i8p, 2, "vla.cur"); cur->addIncoming(begin, originBB); // memcpy the individual element bit-pattern. Builder.CreateMemCpy(cur, src, baseSizeInChars, baseSizeAndAlign.second.getQuantity(), /*volatile*/ false); // Go to the next element. llvm::Value *next = Builder.CreateConstInBoundsGEP1_32(cur, 1, "vla.next"); // Leave if that's the end of the VLA. llvm::Value *done = Builder.CreateICmpEQ(next, end, "vla-init.isdone"); Builder.CreateCondBr(done, contBB, loopBB); cur->addIncoming(next, loopBB); CGF.EmitBlock(contBB); }
/// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen /// function. Here is the logic: /// if (Cond) { /// CodeGen(true); /// } else { /// CodeGen(false); /// } static void EmitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, const std::function<void(bool)> &CodeGen) { CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); // If the condition constant folds and can be elided, try to avoid emitting // the condition and the dead arm of the if/else. bool CondConstant; if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { CodeGen(CondConstant); return; } // Otherwise, the condition did not fold, or we couldn't elide it. Just // emit the conditional branch. auto ThenBlock = CGF.createBasicBlock(/*name*/ "omp_if.then"); auto ElseBlock = CGF.createBasicBlock(/*name*/ "omp_if.else"); auto ContBlock = CGF.createBasicBlock(/*name*/ "omp_if.end"); CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount*/ 0); // Emit the 'then' code. CGF.EmitBlock(ThenBlock); CodeGen(/*ThenBlock*/ true); CGF.EmitBranch(ContBlock); // Emit the 'else' code if present. { // There is no need to emit line number for unconditional branch. SuppressDebugLocation SDL(CGF.Builder); CGF.EmitBlock(ElseBlock); } CodeGen(/*ThenBlock*/ false); { // There is no need to emit line number for unconditional branch. SuppressDebugLocation SDL(CGF.Builder); CGF.EmitBranch(ContBlock); } // Emit the continuation block for code after the if. CGF.EmitBlock(ContBlock, /*IsFinished*/ true); }
static void EmitCleanup(CodeGenFunction &CGF, EHScopeStack::Cleanup *Fn, EHScopeStack::Cleanup::Flags flags, llvm::Value *ActiveFlag) { // Itanium EH cleanups occur within a terminate scope. Microsoft SEH doesn't // have this behavior, and the Microsoft C++ runtime will call terminate for // us if the cleanup throws. bool PushedTerminate = false; if (flags.isForEHCleanup() && !CGF.getTarget().getCXXABI().isMicrosoft()) { CGF.EHStack.pushTerminate(); PushedTerminate = true; } // If there's an active flag, load it and skip the cleanup if it's // false. llvm::BasicBlock *ContBB = nullptr; if (ActiveFlag) { ContBB = CGF.createBasicBlock("cleanup.done"); llvm::BasicBlock *CleanupBB = CGF.createBasicBlock("cleanup.action"); llvm::Value *IsActive = CGF.Builder.CreateLoad(ActiveFlag, "cleanup.is_active"); CGF.Builder.CreateCondBr(IsActive, CleanupBB, ContBB); CGF.EmitBlock(CleanupBB); } // Ask the cleanup to emit itself. Fn->Emit(CGF, flags); assert(CGF.HaveInsertPoint() && "cleanup ended with no insertion point?"); // Emit the continuation block if there was an active flag. if (ActiveFlag) CGF.EmitBlock(ContBB); // Leave the terminate scope. if (PushedTerminate) CGF.EHStack.popTerminate(); }
void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF, const ObjCAtTryStmt &S, llvm::Constant *beginCatchFn, llvm::Constant *endCatchFn, llvm::Constant *exceptionRethrowFn) { // Jump destination for falling out of catch bodies. CodeGenFunction::JumpDest Cont; if (S.getNumCatchStmts()) Cont = CGF.getJumpDestInCurrentScope("eh.cont"); CodeGenFunction::FinallyInfo FinallyInfo; if (const ObjCAtFinallyStmt *Finally = S.getFinallyStmt()) FinallyInfo.enter(CGF, Finally->getFinallyBody(), beginCatchFn, endCatchFn, exceptionRethrowFn); SmallVector<CatchHandler, 8> Handlers; // Enter the catch, if there is one. if (S.getNumCatchStmts()) { for (unsigned I = 0, N = S.getNumCatchStmts(); I != N; ++I) { const ObjCAtCatchStmt *CatchStmt = S.getCatchStmt(I); const VarDecl *CatchDecl = CatchStmt->getCatchParamDecl(); Handlers.push_back(CatchHandler()); CatchHandler &Handler = Handlers.back(); Handler.Variable = CatchDecl; Handler.Body = CatchStmt->getCatchBody(); Handler.Block = CGF.createBasicBlock("catch"); // @catch(...) always matches. if (!CatchDecl) { Handler.TypeInfo = 0; // catch-all // Don't consider any other catches. break; } Handler.TypeInfo = GetEHType(CatchDecl->getType()); } EHCatchScope *Catch = CGF.EHStack.pushCatch(Handlers.size()); for (unsigned I = 0, E = Handlers.size(); I != E; ++I) Catch->setHandler(I, Handlers[I].TypeInfo, Handlers[I].Block); } // Emit the try body. CGF.EmitStmt(S.getTryBody()); // Leave the try. if (S.getNumCatchStmts()) CGF.popCatchScope(); // Remember where we were. CGBuilderTy::InsertPoint SavedIP = CGF.Builder.saveAndClearIP(); // Emit the handlers. for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { CatchHandler &Handler = Handlers[I]; CGF.EmitBlock(Handler.Block); llvm::Value *RawExn = CGF.getExceptionFromSlot(); // Enter the catch. llvm::Value *Exn = RawExn; if (beginCatchFn) { Exn = CGF.Builder.CreateCall(beginCatchFn, RawExn, "exn.adjusted"); cast<llvm::CallInst>(Exn)->setDoesNotThrow(); } CodeGenFunction::RunCleanupsScope cleanups(CGF); if (endCatchFn) { // Add a cleanup to leave the catch. bool EndCatchMightThrow = (Handler.Variable == 0); CGF.EHStack.pushCleanup<CallObjCEndCatch>(NormalAndEHCleanup, EndCatchMightThrow, endCatchFn); } // Bind the catch parameter if it exists. if (const VarDecl *CatchParam = Handler.Variable) { llvm::Type *CatchType = CGF.ConvertType(CatchParam->getType()); llvm::Value *CastExn = CGF.Builder.CreateBitCast(Exn, CatchType); CGF.EmitAutoVarDecl(*CatchParam); CGF.Builder.CreateStore(CastExn, CGF.GetAddrOfLocalVar(CatchParam)); } CGF.ObjCEHValueStack.push_back(Exn); CGF.EmitStmt(Handler.Body); CGF.ObjCEHValueStack.pop_back(); // Leave any cleanups associated with the catch. cleanups.ForceCleanup(); CGF.EmitBranchThroughCleanup(Cont); } // Go back to the try-statement fallthrough. CGF.Builder.restoreIP(SavedIP); // Pop out of the finally. if (S.getFinallyStmt()) FinallyInfo.exit(CGF); if (Cont.isValid()) CGF.EmitBlock(Cont.getBlock()); }
/// The ARM code here follows the Itanium code closely enough that we /// just special-case it at particular places. void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, llvm::GlobalVariable *GV) { CGBuilderTy &Builder = CGF.Builder; // We only need to use thread-safe statics for local variables; // global initialization is always single-threaded. bool threadsafe = (getContext().getLangOptions().ThreadsafeStatics && D.isLocalVarDecl()); llvm::IntegerType *GuardTy; // If we have a global variable with internal linkage and thread-safe statics // are disabled, we can just let the guard variable be of type i8. bool useInt8GuardVariable = !threadsafe && GV->hasInternalLinkage(); if (useInt8GuardVariable) { GuardTy = CGF.Int8Ty; } else { // Guard variables are 64 bits in the generic ABI and 32 bits on ARM. GuardTy = (IsARM ? CGF.Int32Ty : CGF.Int64Ty); } llvm::PointerType *GuardPtrTy = GuardTy->getPointerTo(); // Create the guard variable. llvm::SmallString<256> GuardVName; llvm::raw_svector_ostream Out(GuardVName); getMangleContext().mangleItaniumGuardVariable(&D, Out); Out.flush(); // Just absorb linkage and visibility from the variable. llvm::GlobalVariable *GuardVariable = new llvm::GlobalVariable(CGM.getModule(), GuardTy, false, GV->getLinkage(), llvm::ConstantInt::get(GuardTy, 0), GuardVName.str()); GuardVariable->setVisibility(GV->getVisibility()); // Test whether the variable has completed initialization. llvm::Value *IsInitialized; // ARM C++ ABI 3.2.3.1: // To support the potential use of initialization guard variables // as semaphores that are the target of ARM SWP and LDREX/STREX // synchronizing instructions we define a static initialization // guard variable to be a 4-byte aligned, 4- byte word with the // following inline access protocol. // #define INITIALIZED 1 // if ((obj_guard & INITIALIZED) != INITIALIZED) { // if (__cxa_guard_acquire(&obj_guard)) // ... // } if (IsARM && !useInt8GuardVariable) { llvm::Value *V = Builder.CreateLoad(GuardVariable); V = Builder.CreateAnd(V, Builder.getInt32(1)); IsInitialized = Builder.CreateIsNull(V, "guard.uninitialized"); // Itanium C++ ABI 3.3.2: // The following is pseudo-code showing how these functions can be used: // if (obj_guard.first_byte == 0) { // if ( __cxa_guard_acquire (&obj_guard) ) { // try { // ... initialize the object ...; // } catch (...) { // __cxa_guard_abort (&obj_guard); // throw; // } // ... queue object destructor with __cxa_atexit() ...; // __cxa_guard_release (&obj_guard); // } // } } else { // Load the first byte of the guard variable. llvm::Type *PtrTy = Builder.getInt8PtrTy(); llvm::LoadInst *LI = Builder.CreateLoad(Builder.CreateBitCast(GuardVariable, PtrTy)); LI->setAlignment(1); // Itanium ABI: // An implementation supporting thread-safety on multiprocessor // systems must also guarantee that references to the initialized // object do not occur before the load of the initialization flag. // // In LLVM, we do this by marking the load Acquire. if (threadsafe) LI->setAtomic(llvm::Acquire); IsInitialized = Builder.CreateIsNull(LI, "guard.uninitialized"); } llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check"); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end"); // Check if the first byte of the guard variable is zero. Builder.CreateCondBr(IsInitialized, InitCheckBlock, EndBlock); CGF.EmitBlock(InitCheckBlock); // Variables used when coping with thread-safe statics and exceptions. if (threadsafe) { // Call __cxa_guard_acquire. llvm::Value *V = Builder.CreateCall(getGuardAcquireFn(CGM, GuardPtrTy), GuardVariable); llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init"); Builder.CreateCondBr(Builder.CreateIsNotNull(V, "tobool"), InitBlock, EndBlock); // Call __cxa_guard_abort along the exceptional edge. CGF.EHStack.pushCleanup<CallGuardAbort>(EHCleanup, GuardVariable); CGF.EmitBlock(InitBlock); } // Emit the initializer and add a global destructor if appropriate. CGF.EmitCXXGlobalVarDeclInit(D, GV); if (threadsafe) { // Pop the guard-abort cleanup if we pushed one. CGF.PopCleanupBlock(); // Call __cxa_guard_release. This cannot throw. Builder.CreateCall(getGuardReleaseFn(CGM, GuardPtrTy), GuardVariable); } else { Builder.CreateStore(llvm::ConstantInt::get(GuardTy, 1), GuardVariable); } CGF.EmitBlock(EndBlock); }
// CopyObject - Utility to copy an object. Calls copy constructor as necessary. // DestPtr is casted to the right type. static void CopyObject(CodeGenFunction &CGF, const Expr *E, llvm::Value *DestPtr, llvm::Value *ExceptionPtrPtr) { QualType ObjectType = E->getType(); // Store the throw exception in the exception object. if (!CGF.hasAggregateLLVMType(ObjectType)) { llvm::Value *Value = CGF.EmitScalarExpr(E); const llvm::Type *ValuePtrTy = Value->getType()->getPointerTo(); CGF.Builder.CreateStore(Value, CGF.Builder.CreateBitCast(DestPtr, ValuePtrTy)); } else { const llvm::Type *Ty = CGF.ConvertType(ObjectType)->getPointerTo(); const CXXRecordDecl *RD = cast<CXXRecordDecl>(ObjectType->getAs<RecordType>()->getDecl()); llvm::Value *This = CGF.Builder.CreateBitCast(DestPtr, Ty); if (RD->hasTrivialCopyConstructor()) { CGF.EmitAggExpr(E, This, false); } else if (CXXConstructorDecl *CopyCtor = RD->getCopyConstructor(CGF.getContext(), 0)) { llvm::Value *CondPtr = 0; if (CGF.Exceptions) { CodeGenFunction::EHCleanupBlock Cleanup(CGF); llvm::Constant *FreeExceptionFn = getFreeExceptionFn(CGF); llvm::BasicBlock *CondBlock = CGF.createBasicBlock("cond.free"); llvm::BasicBlock *Cont = CGF.createBasicBlock("cont"); CondPtr = CGF.CreateTempAlloca(llvm::Type::getInt1Ty(CGF.getLLVMContext()), "doEHfree"); CGF.Builder.CreateCondBr(CGF.Builder.CreateLoad(CondPtr), CondBlock, Cont); CGF.EmitBlock(CondBlock); // Load the exception pointer. llvm::Value *ExceptionPtr = CGF.Builder.CreateLoad(ExceptionPtrPtr); CGF.Builder.CreateCall(FreeExceptionFn, ExceptionPtr); CGF.EmitBlock(Cont); } if (CondPtr) CGF.Builder.CreateStore(llvm::ConstantInt::getTrue(CGF.getLLVMContext()), CondPtr); llvm::Value *Src = CGF.EmitLValue(E).getAddress(); if (CondPtr) CGF.Builder.CreateStore(llvm::ConstantInt::getFalse(CGF.getLLVMContext()), CondPtr); llvm::BasicBlock *TerminateHandler = CGF.getTerminateHandler(); llvm::BasicBlock *PrevLandingPad = CGF.getInvokeDest(); CGF.setInvokeDest(TerminateHandler); // Stolen from EmitClassAggrMemberwiseCopy llvm::Value *Callee = CGF.CGM.GetAddrOfCXXConstructor(CopyCtor, Ctor_Complete); CallArgList CallArgs; CallArgs.push_back(std::make_pair(RValue::get(This), CopyCtor->getThisType(CGF.getContext()))); // Push the Src ptr. CallArgs.push_back(std::make_pair(RValue::get(Src), CopyCtor->getParamDecl(0)->getType())); const FunctionProtoType *FPT = CopyCtor->getType()->getAs<FunctionProtoType>(); CGF.EmitCall(CGF.CGM.getTypes().getFunctionInfo(CallArgs, FPT), Callee, ReturnValueSlot(), CallArgs, CopyCtor); CGF.setInvokeDest(PrevLandingPad); } else llvm_unreachable("uncopyable object"); } }
// CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local // array and kernels are launched using cudaLaunchKernel(). void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args) { // Build the shadow stack entry at the very start of the function. // Calculate amount of space we will need for all arguments. If we have no // args, allocate a single pointer so we still have a valid pointer to the // argument array that we can pass to runtime, even if it will be unused. Address KernelArgs = CGF.CreateTempAlloca( VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args", llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size()))); // Store pointers to the arguments in a locally allocated launch_args. for (unsigned i = 0; i < Args.size(); ++i) { llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer(); llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy); CGF.Builder.CreateDefaultAlignedStore( VoidVarPtr, CGF.Builder.CreateConstGEP1_32(KernelArgs.getPointer(), i)); } llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end"); // Lookup cudaLaunchKernel function. // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, // void **args, size_t sharedMem, // cudaStream_t stream); TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); IdentifierInfo &cudaLaunchKernelII = CGM.getContext().Idents.get("cudaLaunchKernel"); FunctionDecl *cudaLaunchKernelFD = nullptr; for (const auto &Result : DC->lookup(&cudaLaunchKernelII)) { if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result)) cudaLaunchKernelFD = FD; } if (cudaLaunchKernelFD == nullptr) { CGM.Error(CGF.CurFuncDecl->getLocation(), "Can't find declaration for cudaLaunchKernel()"); return; } // Create temporary dim3 grid_dim, block_dim. ParmVarDecl *GridDimParam = cudaLaunchKernelFD->getParamDecl(1); QualType Dim3Ty = GridDimParam->getType(); Address GridDim = CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "grid_dim"); Address BlockDim = CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim"); Address ShmemSize = CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size"); Address Stream = CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream"); llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {/*gridDim=*/GridDim.getType(), /*blockDim=*/BlockDim.getType(), /*ShmemSize=*/ShmemSize.getType(), /*Stream=*/Stream.getType()}, /*isVarArg=*/false), "__cudaPopCallConfiguration"); CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn, {GridDim.getPointer(), BlockDim.getPointer(), ShmemSize.getPointer(), Stream.getPointer()}); // Emit the call to cudaLaunch llvm::Value *Kernel = CGF.Builder.CreatePointerCast(CGF.CurFn, VoidPtrTy); CallArgList LaunchKernelArgs; LaunchKernelArgs.add(RValue::get(Kernel), cudaLaunchKernelFD->getParamDecl(0)->getType()); LaunchKernelArgs.add(RValue::getAggregate(GridDim), Dim3Ty); LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty); LaunchKernelArgs.add(RValue::get(KernelArgs.getPointer()), cudaLaunchKernelFD->getParamDecl(3)->getType()); LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(ShmemSize)), cudaLaunchKernelFD->getParamDecl(4)->getType()); LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(Stream)), cudaLaunchKernelFD->getParamDecl(5)->getType()); QualType QT = cudaLaunchKernelFD->getType(); QualType CQT = QT.getCanonicalType(); llvm::Type *Ty = CGM.getTypes().ConvertType(CQT); llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty); const CGFunctionInfo &FI = CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD); llvm::FunctionCallee cudaLaunchKernelFn = CGM.CreateRuntimeFunction(FTy, "cudaLaunchKernel"); CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(), LaunchKernelArgs); CGF.EmitBranch(EndBlock); CGF.EmitBlock(EndBlock); }