KOKKOS_INLINE_FUNCTION void operator() (device_type device) const { typename local_view_type::Partition part( device.team_rank() , device.team_size() ); const local_view_type local_x( dev_x , part ); const local_view_type local_y( dev_y , part ); const int element = device.league_rank(); // Apply evaluation function to this thread's fix-sized UQ sample set. simple_function< value_type >( local_x(element) , local_y(element) ); // Print x and y if (print) { device.team_barrier(); if ( ! device.league_rank() && ! device.team_rank() ) { printf("view_kernel league(%d:%d) team_size(%d) dim(%d) size(%d)\n", device.league_rank(), device.league_size(),device.team_size(), int(dev_x.dimension_1()), int(local_x(element).size()) ); } if ( ! device.team_rank() ) { printf("x(%i) = { ",element); for (int sample=0; sample< int(dev_x.dimension_1()); ++sample) { printf("%g ", dev_x(element,sample)); } printf("}\n\n"); printf("y(%i) = { ",element); for (int sample=0; sample< int(dev_y.dimension_1()); ++sample) { printf("%g ", dev_y(element,sample)); } printf("}\n\n"); } device.team_barrier(); } }
KOKKOS_INLINE_FUNCTION void operator() (device_type device) const { int element = device.league_rank(); int num_threads = device.team_size(); int thread = device.team_rank(); int num_samples = dev_x.dimension_1(); int num_samples_per_thread = num_samples / num_threads; // Initialize x storage_type x_s(&dev_x(element,thread), num_samples_per_thread, num_threads); storage_type y_s(&dev_y(element,thread), num_samples_per_thread, num_threads); array_vector_type x(x_s), y(y_s); simple_function<scalar_vector_type>(x,y); // Print x and y if (print) { for (int tidx = 0; tidx<num_threads; tidx++) { if (thread == tidx) { printf("x(%i) = [ ",tidx); for (int sample=0; sample<num_samples_per_thread; sample++) printf("%g ", x.coeff(sample)); printf("]\n\n"); } device.team_barrier(); } for (int tidx = 0; tidx<num_threads; tidx++) { if (thread == tidx) { printf("y(%i) = [ ",tidx); for (int sample=0; sample<num_samples_per_thread; sample++) printf("%g ", y.coeff(sample)); printf("]\n\n"); } device.team_barrier(); } } }
KOKKOS_INLINE_FUNCTION void operator() (device_type device) const { int element = device.league_rank(); int num_threads = device.team_size(); int thread = device.team_rank(); int num_samples = dev_x.dimension_1(); scalar_type x, y; for (int sample=thread; sample<num_samples; sample+=num_threads) { // Initialize x x = dev_x(element, sample); // Compute function simple_function<scalar_type>(x,y); // Return result dev_y(element, sample) = y; } }
KOKKOS_INLINE_FUNCTION void operator() (device_type device) const { int element = device.league_rank(); int num_threads = device.team_size(); int thread = device.team_rank(); int num_samples = dev_x.dimension_1(); int num_samples_per_thread = num_samples / num_threads; // multi-point expansions array_vector_type x(num_samples_per_thread, 0.0), y(num_samples_per_thread, 0.0); // Initialize x if (reset && storage_type::supports_reset) { storage_type& x_s = x.storage(); storage_type& y_s = y.storage(); x_s.shallowReset(&dev_x(element,thread), num_samples_per_thread, num_threads, false); y_s.shallowReset(&dev_y(element,thread), num_samples_per_thread, num_threads, false); } else { for (int sample=0; sample<num_samples_per_thread; ++sample) x.fastAccessCoeff(sample) = dev_x(element,thread+sample*num_threads); } simple_function<scalar_vector_type>(x,y); // Print x and y if (print) { for (int tidx = 0; tidx<num_threads; tidx++) { if (thread == tidx) { printf("x(%i) = [ ",tidx); for (int sample=0; sample<num_samples_per_thread; sample++) printf("%g ", x.coeff(sample)); printf("]\n\n"); } device.team_barrier(); } for (int tidx = 0; tidx<num_threads; tidx++) { if (thread == tidx) { printf("y(%i) = [ ",tidx); for (int sample=0; sample<num_samples_per_thread; sample++) printf("%g ", y.coeff(sample)); printf("]\n\n"); } device.team_barrier(); } } // Return result if (!(reset && storage_type::supports_reset)) { for (int sample=0; sample<num_samples_per_thread; ++sample) dev_y(element,thread+sample*num_threads) = y.fastAccessCoeff(sample); } }