void parallel_for_each_v1(Iterator first, Iterator last, Func f) { const unsigned long length = std::distance(first, last); if(!length) return; const unsigned long min_per_thread = 25; const unsigned long max_threads = (length + min_per_thread - 1)/min_per_thread; const unsigned long hardware_threads = std::thread::hardware_concurrency(); const unsigned long num_threads = std::min(hardware_threads != 0 ? hardware_threads : 2, max_threads); const unsigned long block_size = length/num_threads; std::vector<std::future<void> > futures(num_threads - 1); std::vector<std::thread> threads(num_threads - 1); join_threads joiner(threads); Iterator block_start = first; for(unsigned long i = 0; i < num_threads - 1; ++i) { Iterator block_end = block_start; std::advance(block_end, block_size); std::packaged_task<void(void)> task([=]{std::for_each(block_start, block_end, f);}); futures[i] = task.get_future(); threads[i] = std::thread(std::move(task)); block_start = block_end; } std::for_each(block_start, last, f); for(unsigned long i = 0; i < num_threads - 1; ++i) futures[i].get(); }
void parallel_partial_sum(Iterator first,Iterator last) { typedef typename Iterator::value_type value_type; struct process_element { void operator()(Iterator first,Iterator last, std::vector<value_type>& buffer, unsigned i,barrier& b) { value_type& ith_element=*(first+i); bool update_source=false; for(unsigned step=0,stride=1;stride<=i;++step,stride*=2) { value_type const& source=(step%2)? buffer[i]:ith_element; value_type& dest=(step%2)? ith_element:buffer[i]; value_type const& addend=(step%2)? buffer[i-stride]:*(first+i-stride); dest=source+addend; update_source=!(step%2); b.wait(); } if(update_source) { ith_element=buffer[i]; } b.done_waiting(); } }; unsigned long const length=std::distance(first,last); if(length<=1) return; std::vector<value_type> buffer(length); barrier b(length); std::vector<std::thread> threads(length-1); join_threads joiner(threads); Iterator block_start=first; for(unsigned long i=0;i<(length-1);++i) { threads[i]=std::thread(process_element(),first,last, std::ref(buffer),i,std::ref(b)); } process_element()(first,last,buffer,length-1,b); }
void test (Delim &&d, Iter first, Iter last, const CharT *expected ) { typedef exp::ostream_joiner<typename std::decay<Delim>::type, CharT, Traits> Joiner; static_assert((std::is_copy_constructible<Joiner>::value == std::is_copy_constructible<typename std::decay<Delim>::type>::value), "" ); static_assert((std::is_move_constructible<Joiner>::value == std::is_move_constructible<typename std::decay<Delim>::type>::value), "" ); static_assert((std::is_copy_assignable<Joiner> ::value == std::is_copy_assignable<typename std::decay<Delim>::type> ::value), "" ); static_assert((std::is_move_assignable<Joiner> ::value == std::is_move_assignable<typename std::decay<Delim>::type> ::value), "" ); std::basic_stringstream<CharT, Traits> sstream; Joiner joiner(sstream, d); while (first != last) *joiner++ = *first++; assert(sstream.str() == expected); }
void FontScanner::Scan(FontCenterScanResponse& resp , WindowAreas::const_iterator areaBegin , WindowAreas::const_iterator areaEnd , CImg<uint8_t>& image , FontScanContext& context) { for(auto itArea = areaBegin; itArea != areaEnd; ++itArea) { FontScanJoiner joiner(*itArea, image, context); auto& fontPatterns = context.GetFontPatterns(); tbb::parallel_reduce( tbb::blocked_range<FontPatterns::const_iterator>(fontPatterns.begin(), fontPatterns.end()), joiner ); resp.results.push_back(joiner.GetValue()); } resp.status = 0; }
Iterator parallel_find(Iterator first, Iterator last, MatchType match) { struct find_element { void operator()(Iterator begin, Iterator end, MatchType match, std::promise<Iterator>* result, std::atomic<bool>* done_flag) { try { for(; (begin != end) && !done_flag->load(); ++begin) // always check done_flag, if find then return { if(*begin == match) { result->set_value(begin); done_flag->store(true); return; } } } catch(...) { try { result->set_exception(std::current_exception()); done_flag->store(true); } catch(...) {} } } }; const int length = std::distance(first, last); if(!length) return last; const int min_per_thread = 25; const int max_threads= (length + min_per_thread - 1)/min_per_thread; const int hardware_threads= std::thread::hardware_concurrency(); const int num_threads= std::min(hardware_threads !=0 ? hardware_threads : 2, max_threads); const int block_size= length / num_threads; std::promise<Iterator> result; std::atomic<bool> done_flag(false); std::vector<std::thread> threads(num_threads - 1); { join_threads joiner(threads); Iterator block_start = first; for(unsigned long i=0;i<(num_threads-1);++i) { Iterator block_end = block_start; std::advance(block_end, block_size); threads[i] = std::thread(find_element(), block_start, block_end, match, &result, &done_flag); block_start = block_end; } find_element()(block_start, last, match, &result, &done_flag); } if(!done_flag.load()) { return last; } return result.get_future().get(); }
void parallel_partial_sum(Iterator first, Iterator last) { typedef typename Iterator::value_type value_type; struct process_chunk { void operator()(Iterator begin, Iterator last, std::future<value_type>* previous_end_value, std::promise<value_type>* end_value) { try { Iterator end = last; ++end; std::partial_sum(begin, end, begin); if (previous_end_value) { value_type& addend = previous_end_value->get(); *last += addend; if (end_value) { end_value->set_value(*last); } std::for_each(begin, last, [addend](value_type & item) { item += addend; }); } else if (end_value) { end_value->set_value(*last); } } catch (...) { if (end_value) { end_value->set_exception(std::current_exception()); } else { throw; } } } }; unsigned long const length = std::distance(first, last); if (!length) { return last; } unsigned long const min_per_thread = 25; unsigned long const max_threads = (length + min_per_thread - 1) / min_per_thread; unsigned long const hardware_threads = std::thread::hardware_concurrency(); unsigned long const num_threads = std::min(hardware_threads != 0 ? hardware_threads : 2, max_threads); unsigned long const block_size = length / num_threads; typedef typename Iterator::value_type value_type; std::vector<std::thread> threads(num_threads - 1); std::vector<std::promise<value_type> > end_values(num_threads - 1); std::vector<std::future<value_type> > previous_end_values; previous_end_values.reserve(num_threads - 1); join_threads joiner(threads); Iterator block_start = first; for (unsigned long i = 0; i < (num_threads - 1); ++i) { Iterator block_last = block_start; std::advance(block_last, block_size - 1); threads[i] = std::thread(process_chunk(), block_start, block_last, (i != 0) ? &previous_end_values[i - 1] : 0, &end_values[i]); block_start = block_last; ++block_start; previous_end_values.push_back(end_values[i].get_future()); } Iterator final_element = block_start; std::advance(final_element, std::distance(block_start, last) - 1); process_chunk()(block_start, final_element, (num_threads > 1) ? &previous_end_values.back() : 0, 0); }