void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &S) { auto IC = S.counters().begin(); for (auto F : S.finals()) { if (LocalDeclMap.lookup(cast<DeclRefExpr>((*IC))->getDecl())) { EmitIgnoredExpr(F); } ++IC; } }
void CodeGenFunction::EmitOMPInnerLoop(const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool SeparateIter) { auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); auto Cnt = getPGORegionCounter(&S); // Start the loop with a block that tests the condition. auto CondBlock = createBasicBlock("omp.inner.for.cond"); EmitBlock(CondBlock); LoopStack.push(CondBlock); // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. auto ExitBlock = LoopExit.getBlock(); if (LoopScope.requiresCleanups()) ExitBlock = createBasicBlock("omp.inner.for.cond.cleanup"); auto LoopBody = createBasicBlock("omp.inner.for.body"); // Emit condition: "IV < LastIteration + 1 [ - 1]" // ("- 1" when lastprivate clause is present - separate one iteration). llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond(SeparateIter)); Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock, PGO.createLoopWeights(S.getCond(SeparateIter), Cnt)); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); EmitBranchThroughCleanup(LoopExit); } EmitBlock(LoopBody); Cnt.beginRegion(Builder); // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.inner.for.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); EmitOMPLoopBody(S); EmitStopPoint(&S); // Emit "IV = IV + 1" and a back-edge to the condition block. EmitBlock(Continue.getBlock()); EmitIgnoredExpr(S.getInc()); BreakContinueStack.pop_back(); EmitBranch(CondBlock); LoopStack.pop(); // Emit the fall-through block. EmitBlock(LoopExit.getBlock()); }
void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &S, bool SeparateIter) { RunCleanupsScope BodyScope(*this); // Update counters values on current iteration. for (auto I : S.updates()) { EmitIgnoredExpr(I); } // On a continue in the body, jump to the end. auto Continue = getJumpDestInCurrentScope("omp.body.continue"); BreakContinueStack.push_back(BreakContinue(JumpDest(), Continue)); // Emit loop body. EmitStmt(S.getBody()); // The end (updates/cleanups). EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); if (SeparateIter) { // TODO: Update lastprivates if the SeparateIter flag is true. // This will be implemented in a follow-up OMPLastprivateClause patch, but // result should be still correct without it, as we do not make these // variables private yet. } }
void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { // Emit the loop iteration variable. auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable()); auto IVDecl = cast<VarDecl>(IVExpr->getDecl()); EmitVarDecl(*IVDecl); // Emit the iterations count variable. // If it is not a variable, Sema decided to calculate iterations count on each // iteration (e.g., it is foldable into a constant). if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); // Emit calculation of the iterations count. EmitIgnoredExpr(S.getCalcLastIteration()); } auto &RT = CGM.getOpenMPRuntime(); // Check pre-condition. { // Skip the entire loop if we don't meet the precondition. RegionCounter Cnt = getPGORegionCounter(&S); auto ThenBlock = createBasicBlock("omp.precond.then"); auto ContBlock = createBasicBlock("omp.precond.end"); EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); EmitBlock(ThenBlock); Cnt.beginRegion(Builder); // Emit 'then' code. { // Emit helper vars inits. LValue LB = EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable())); LValue UB = EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable())); LValue ST = EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable())); LValue IL = EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable())); OMPPrivateScope LoopScope(*this); EmitPrivateLoopCounters(*this, LoopScope, S.counters()); // Detect the loop schedule kind and chunk. auto ScheduleKind = OMPC_SCHEDULE_unknown; llvm::Value *Chunk = nullptr; if (auto C = cast_or_null<OMPScheduleClause>( S.getSingleClause(OMPC_schedule))) { ScheduleKind = C->getScheduleKind(); if (auto Ch = C->getChunkSize()) { Chunk = EmitScalarExpr(Ch); Chunk = EmitScalarConversion(Chunk, Ch->getType(), S.getIterationVariable()->getType()); } } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); if (RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr)) { // OpenMP [2.7.1, Loop Construct, Description, table 2-1] // When no chunk_size is specified, the iteration space is divided into // chunks that are approximately equal in size, and at most one chunk is // distributed to each thread. Note that the size of the chunks is // unspecified in this case. RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL.getAddress(), LB.getAddress(), UB.getAddress(), ST.getAddress()); // UB = min(UB, GlobalUB); EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB; EmitIgnoredExpr(S.getInit()); // while (idx <= UB) { BODY; ++idx; } EmitOMPInnerLoop(S, LoopScope); // Tell the runtime we are done. RT.EmitOMPForFinish(*this, S.getLocStart(), ScheduleKind); } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. EmitOMPForOuterLoop(ScheduleKind, S, LoopScope, LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(), Chunk); } } // We're now done with the loop, so jump to the continuation block. EmitBranch(ContBlock); EmitBlock(ContBlock, true); } }
void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, OMPPrivateScope &LoopScope, llvm::Value *LB, llvm::Value *UB, llvm::Value *ST, llvm::Value *IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && "static non-chunked schedule does not need outer loop"); if (RT.isDynamic(ScheduleKind)) { ErrorUnsupported(&S, "OpenMP loop with dynamic schedule"); return; } // Emit outer loop. // // OpenMP [2.7.1, Loop Construct, Description, table 2-1] // When schedule(static, chunk_size) is specified, iterations are divided into // chunks of size chunk_size, and the chunks are assigned to the threads in // the team in a round-robin fashion in the order of the thread number. // // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { // while (idx <= UB) { BODY; ++idx; } // inner loop // LB = LB + ST; // UB = UB + ST; // } // const Expr *IVExpr = S.getIterationVariable(); const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB, UB, ST, Chunk); auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); // Start the loop with a block that tests the condition. auto CondBlock = createBasicBlock("omp.dispatch.cond"); EmitBlock(CondBlock); LoopStack.push(CondBlock); llvm::Value *BoolCondVal = nullptr; // UB = min(UB, GlobalUB) EmitIgnoredExpr(S.getEnsureUpperBound()); // IV = LB EmitIgnoredExpr(S.getInit()); // IV < UB BoolCondVal = EvaluateExprAsBool(S.getCond(false)); // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. auto ExitBlock = LoopExit.getBlock(); if (LoopScope.requiresCleanups()) ExitBlock = createBasicBlock("omp.dispatch.cleanup"); auto LoopBody = createBasicBlock("omp.dispatch.body"); Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); EmitBranchThroughCleanup(LoopExit); } EmitBlock(LoopBody); // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); EmitOMPInnerLoop(S, LoopScope); EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); // Emit "LB = LB + Stride", "UB = UB + Stride". EmitIgnoredExpr(S.getNextLowerBound()); EmitIgnoredExpr(S.getNextUpperBound()); EmitBranch(CondBlock); LoopStack.pop(); // Emit the fall-through block. EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. RT.EmitOMPForFinish(*this, S.getLocStart(), ScheduleKind); }