KOKKOS_INLINE_FUNCTION void operator()(const member_type & teamMember) const { const unsigned int sharedMemorySize = _tileSize + 2; Kokkos::View< double** , Kokkos::LayoutRight , execution_space::scratch_memory_space , Kokkos::MemoryUnmanaged > shared( teamMember.team_shmem(), sharedMemorySize, sharedMemorySize); const unsigned int tileIndex = teamMember.league_rank(); const unsigned int tileRow = tileIndex / _numberOfTilesPerSide; const unsigned int tileCol = tileIndex % _numberOfTilesPerSide; const unsigned int sharedRowSource = tileRow * _tileSize; const unsigned int sharedColSource = tileCol * _tileSize; // load shared memory Kokkos::parallel_for (Kokkos::TeamThreadRange(teamMember, sharedMemorySize * sharedMemorySize), [=] (const unsigned int index) { const unsigned int i = index / sharedMemorySize; const unsigned int j = index % sharedMemorySize; shared(i, j) = _u(_t, sharedRowSource + i, sharedColSource + j); }); teamMember.team_barrier(); // these are indices into shared const unsigned int iShared = teamMember.team_rank() / _tileSize + 1; const unsigned int jShared = teamMember.team_rank() % _tileSize + 1; const unsigned int i = tileRow * _tileSize + iShared; const unsigned int j = tileCol * _tileSize + jShared; // do the calculation const double utij = shared(iShared, jShared); _u(_tp1, i, j) = (2 - 4 * _courant2) * utij - _u(_tp1, i, j) + _courant2 * (1*shared(iShared+1, jShared) + shared(iShared-1, jShared) + shared(iShared, jShared+1) + shared(iShared, jShared-1)); }
// task team interface void apply(const member_type &member, value_type &r_val) { for (int iter=0;iter<BIG;++iter) { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, SMALL), [&](const long i) { double tmp = 0.0; for (long j=0;j<TINY;++j) tmp += j; _dummy[i] += (tmp + 1); }); if (_use_barrier) member.team_barrier(); } }
// task team interface KOKKOS_INLINE_FUNCTION void apply(const member_type &member, value_type &r_val) { const int offset = _itask*SMALL; auto a = &_a[offset]; auto b = &_b[offset]; for (int iter=0;iter<BIG;++iter) { Kokkos::parallel_for(Kokkos::TeamThreadRange(member, SMALL), [&](const int i) { ValueType tmp = 0.0; for (int j=0;j<TINY;++j) tmp += j; a[i] = b[i] + (tmp + 1); }); // For testing member.team_barrier(); } }