KOKKOS_INLINE_FUNCTION size_t get_thread_id(const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type & teamMember) const{ #if defined( KOKKOS_HAVE_SERIAL ) if (Kokkos::Impl::is_same< Kokkos::Serial , ExecutionSpace >::value){ return 0; } #endif #if defined( KOKKOS_HAVE_PTHREAD ) if (Kokkos::Impl::is_same< Kokkos::Threads , ExecutionSpace >::value){ return Kokkos::Threads::hardware_thread_id(); } #endif #if defined( KOKKOS_HAVE_OPENMP ) if (Kokkos::Impl::is_same< Kokkos::OpenMP, ExecutionSpace >::value){ return Kokkos::OpenMP::hardware_thread_id(); } #endif return teamMember.league_rank() * teamMember.team_size()+ teamMember.team_rank(); }
KOKKOS_INLINE_FUNCTION void operator()(const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type & teamMember) const { Kokkos::single(Kokkos::PerTeam(teamMember),[=] () { printf("teamMember teamsize:%d\n", teamMember.team_size()); }); volatile idx * myData = NULL; size_t tid = this->get_thread_id(teamMember); int trial = 0; while (myData == NULL){ ++trial; Kokkos::single(Kokkos::PerThread(teamMember),[&] (volatile idx * &memptr) { memptr = (volatile idx * )this->my_memory_pool.allocate_chunk(tid); }, myData); } for (int i = 0; i < 100; ++i){ Kokkos::parallel_for( Kokkos::ThreadVectorRange(teamMember, 32), [&] (int j) { myData[j] = i; }); } Kokkos::parallel_for( Kokkos::ThreadVectorRange(teamMember, 32), [&] (int j) { myData[j] = -1; }); Kokkos::single(Kokkos::PerThread(teamMember),[=] () { /*printf("me:%ld lr:%d ts:%d tr:%d, Had Memory location:%ld with chunk_index:%ld in this many tries:%d\n", tid, (int) teamMember.league_rank(), (int) teamMember.team_size(), (int) teamMember.team_rank(), myData,my_memory_pool.get_chunk_index((idx *) myData), trial );*/ this->my_memory_pool.release_chunk((idx *) myData); }); myData = NULL; }
KOKKOS_INLINE_FUNCTION void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const { const int chunk = ((nlast - nfirst + dev.league_size() - 1 ) / dev.league_size()); const int teamstart = chunk*dev.league_rank() + nfirst; const int teamend = (teamstart + chunk) < nlast?(teamstart + chunk):nlast; int mysend = 0; for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) { if (x(i,dim) >= lo && x(i,dim) <= hi) mysend++; } const int my_store_pos = dev.team_scan(mysend,&nsend(0)); if (my_store_pos+mysend < maxsendlist) { mysend = my_store_pos; for(int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()){ if (x(i,dim) >= lo && x(i,dim) <= hi) { sendlist(iswap,mysend++) = i; } } } }