Beispiel #1
0
void RadixSort_TBB(vector<int>& vec, int nthreads = 1)
{
	task_scheduler_init init(task_scheduler_init::deferred);

	int num_of_threads;
	if (nthreads > 1)
	{
		num_of_threads = nthreads;
		init.initialize(nthreads);
	}
	else
	{
		num_of_threads = task_scheduler_init::default_num_threads();
		init.initialize(num_of_threads);
	}

	int num_of_numbers = static_cast<int>(vec.size());
	int grainsize1 = num_of_numbers / num_of_threads;

	if (grainsize1 == 0)
	{
		RadixSort(vec);
		return;
	}

	vector<int> buffer(num_of_numbers);

	parallel_for(blocked_range<int>(0, num_of_numbers, grainsize1),
		[&vec, &buffer](const blocked_range<int>& r)
	{
		RadixSort_Part(vec, r.begin(), r.end(), buffer);
	}, auto_partitioner()
		);

	sort(parts.begin(), parts.end(), [](const VecPart& vp1, const VecPart& vp2) 
	{
		return vp1.start < vp2.start; 
	});

	int parts_count = static_cast<int>(parts.size());

	for (int step = 1; step < parts_count; step *= 2)
	{
		parallel_for(blocked_range<int>(0, parts_count / 2),
			[&vec, &buffer, &num_of_numbers, &step](const blocked_range<int>& r)
		{
			int grainsize = r.grainsize();
			Merge(vec, r.begin(), step, buffer);
		}, auto_partitioner()
			);
	}

	if (num_of_threads >= 1)
		init.terminate();
}
Beispiel #2
0
void Merge(vector<int>& vec, const int& merge_num, const int& step, vector<int>& BUF)
{
	int current_part_num = merge_num * step * 2;

	if (current_part_num + step >= parts.size())
		return;

	Iterator start1 = vec.begin() + parts[current_part_num].start;
	Iterator finish1 = start1 + parts[current_part_num].partsize;
	Iterator start2 = finish1;
	Iterator finish2 = start2 + parts[current_part_num + step].partsize;
	Iterator buffer_start = BUF.begin() + parts[current_part_num].start;
	int count = static_cast<int>(std::distance(start1, finish2));

	while (start1 != finish1 && start2 != finish2)
		*buffer_start++ = *start1 <= *start2 ? *start1++ : *start2++;

	while (start1 != finish1)
		*buffer_start++ = *start1++;

	while (start2 != finish2)
		*buffer_start++ = *start2++;

	buffer_start = BUF.begin() + parts[current_part_num].start;
	start1 = vec.begin() + parts[current_part_num].start;

	for (int i = 0; i < count; ++i)
		*start1++ = *buffer_start++;

	parts[current_part_num].partsize += parts[current_part_num + step].partsize;
}
Beispiel #3
0
void RadixSort_Part(vector<int>& vec, const int& begin, const int& end, vector<int>& BUF)
{
	VecPart vp(begin, end - begin);
	parts.push_back(vp);

	if (end - begin == 1) return;

	int max_dig_count = 10;
	int max_dig = 10;
	int num_of_numbers = static_cast<int>(vec.size());
	int count, tmp;
	vector<int> pos_vec(10);

	for (int i = 0; i < max_dig_count; ++i)
	{
		for (int j = 0; j < max_dig; ++j)
			pos_vec[j] = 0;

		for (int j = begin; j < end; ++j)
			++pos_vec[get_dig(vec[j], i)];

		count = 0;
		for (int j = 0; j < max_dig; ++j)
		{
			tmp = pos_vec[j];
			pos_vec[j] = count;
			count += tmp;
		}

		for (int j = begin; j < end; ++j)
			BUF[begin + pos_vec[get_dig(vec[j], i)]++] = vec[j];

		for (int j = begin; j < end; ++j)
			vec[j] = BUF[j];
	}

	int nneg = 0;

	for (int i = end - 1, j = begin; i >= begin; --i)
		if (BUF[i] < 0)
		{
			++nneg;
			vec[j++] = BUF[i];
		}

	for (int i = begin, j = begin + nneg; i < end; ++i)
		if (BUF[i] >= 0)
			vec[j++] = BUF[i];
}
Beispiel #4
0
 static ParallelScanSum pararell_scan(concurrent_vector<long long> inputData)
 {
   ParallelScanSum body(inputData);
   parallel_scan(blocked_range<long long>(0, inputData.size()), body, auto_partitioner());
   return body;
 }