void Merge(vector<int>& vec, const int& merge_num, const int& step, vector<int>& BUF) { int current_part_num = merge_num * step * 2; if (current_part_num + step >= parts.size()) return; Iterator start1 = vec.begin() + parts[current_part_num].start; Iterator finish1 = start1 + parts[current_part_num].partsize; Iterator start2 = finish1; Iterator finish2 = start2 + parts[current_part_num + step].partsize; Iterator buffer_start = BUF.begin() + parts[current_part_num].start; int count = static_cast<int>(std::distance(start1, finish2)); while (start1 != finish1 && start2 != finish2) *buffer_start++ = *start1 <= *start2 ? *start1++ : *start2++; while (start1 != finish1) *buffer_start++ = *start1++; while (start2 != finish2) *buffer_start++ = *start2++; buffer_start = BUF.begin() + parts[current_part_num].start; start1 = vec.begin() + parts[current_part_num].start; for (int i = 0; i < count; ++i) *start1++ = *buffer_start++; parts[current_part_num].partsize += parts[current_part_num + step].partsize; }
void RadixSort_TBB(vector<int>& vec, int nthreads = 1) { task_scheduler_init init(task_scheduler_init::deferred); int num_of_threads; if (nthreads > 1) { num_of_threads = nthreads; init.initialize(nthreads); } else { num_of_threads = task_scheduler_init::default_num_threads(); init.initialize(num_of_threads); } int num_of_numbers = static_cast<int>(vec.size()); int grainsize1 = num_of_numbers / num_of_threads; if (grainsize1 == 0) { RadixSort(vec); return; } vector<int> buffer(num_of_numbers); parallel_for(blocked_range<int>(0, num_of_numbers, grainsize1), [&vec, &buffer](const blocked_range<int>& r) { RadixSort_Part(vec, r.begin(), r.end(), buffer); }, auto_partitioner() ); sort(parts.begin(), parts.end(), [](const VecPart& vp1, const VecPart& vp2) { return vp1.start < vp2.start; }); int parts_count = static_cast<int>(parts.size()); for (int step = 1; step < parts_count; step *= 2) { parallel_for(blocked_range<int>(0, parts_count / 2), [&vec, &buffer, &num_of_numbers, &step](const blocked_range<int>& r) { int grainsize = r.grainsize(); Merge(vec, r.begin(), step, buffer); }, auto_partitioner() ); } if (num_of_threads >= 1) init.terminate(); }
static ParallelScanSum pararell_scan(concurrent_vector<long long> inputData) { ParallelScanSum body(inputData); parallel_scan(blocked_range<long long>(0, inputData.size()), body, auto_partitioner()); return body; }