inline OutputIterator set_symmetric_difference(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits<InputIterator1>::value_type value_type; int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::balanced_path_kernel tiling_kernel; tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); vector<value_type> temp_result(count1+count2, queue.get_context()); vector<uint_> counts((count1+count2+tile_size-1)/tile_size + 1, queue.get_context()); fill_n(counts.end()-1, 1, 0, queue); // Find individual symmetric differences detail::serial_set_symmetric_difference_kernel symmetric_difference_kernel; symmetric_difference_kernel.tile_size = tile_size; symmetric_difference_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), temp_result.begin(), counts.begin()); symmetric_difference_kernel.exec(queue); exclusive_scan(counts.begin(), counts.end(), counts.begin(), queue); // Compact the results detail::compact_kernel compact_kernel; compact_kernel.tile_size = tile_size; compact_kernel.set_range(temp_result.begin(), counts.begin(), counts.end(), result); compact_kernel.exec(queue); return result + (counts.end() - 1).read(queue); }
inline OutputIterator merge_with_merge_path(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, Compare comp, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits<OutputIterator>::difference_type result_difference_type; int tile_size = 1024; int count1 = iterator_range_size(first1, last1); int count2 = iterator_range_size(first2, last2); vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets merge_path_kernel tiling_kernel; tiling_kernel.tile_size = 1024; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1, comp); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); // Merge serial_merge_kernel merge_kernel; merge_kernel.tile_size = 1024; merge_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), result, comp); merge_kernel.exec(queue); return result + static_cast<result_difference_type>(count1 + count2); }
inline bool includes(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, command_queue &queue = system::default_queue()) { typedef typename std::iterator_traits<InputIterator1>::value_type value_type; int tile_size = 1024; int count1 = detail::iterator_range_size(first1, last1); int count2 = detail::iterator_range_size(first2, last2); vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context()); // Tile the sets detail::tile_sets_kernel tiling_kernel; tiling_kernel.tile_size = tile_size; tiling_kernel.set_range(first1, last1, first2, last2, tile_a.begin()+1, tile_b.begin()+1); fill_n(tile_a.begin(), 1, 0, queue); fill_n(tile_b.begin(), 1, 0, queue); tiling_kernel.exec(queue); fill_n(tile_a.end()-1, 1, count1, queue); fill_n(tile_b.end()-1, 1, count2, queue); vector<uint_> result((count1+count2+tile_size-1)/tile_size, queue.get_context()); // Find individually detail::serial_includes_kernel includes_kernel; includes_kernel.tile_size = tile_size; includes_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(), tile_b.begin(), result.begin()); includes_kernel.exec(queue); return find(result.begin(), result.end(), 0, queue) == result.end(); }