void myQuickSort(std::vector < T > &myVec, int q, int r, const int switchThresh) { T pivot; int i, j; /* done with this part of the vector? -> exit function */ if (q >= r) return; /* is the partition to be processed smaller than a certain threshhold? * -> then use insertion sort and exit function afterwards */ if (r - q < switchThresh) { myInsertSort(myVec, q, r); return; } /* now actually sort our partition */ /* choose pivot, initialize borders */ pivot = myVec[r]; i = q - 1; j = r; /* partition step, which moves smaller numbers to the left * and larger numbers to the right of the pivot */ while (true) { while (myVec[++i] < pivot); while (myVec[--j] > pivot); if (i >= j) break; std::swap(myVec[i], myVec[j]); } std::swap(myVec[i], myVec[r]); /* recursively call yourself with new subpartitions, * i is index of pivot * each recursive function call is marked as a task, making parallel * processing of them possible. * note that this is only possible, because all partitions can be * processed independently of each other. */ # pragma intel omp taskq { # pragma intel omp task { myQuickSort(myVec, q, i - 1, switchThresh); } # pragma intel omp task { myQuickSort(myVec, i + 1, r, switchThresh); } } }
void myQuickSort(std::vector < T > &myVec, int q, int r, const int switchThresh, std::stack < std::pair < int, int > >&globalTodoStack, int &numBusyThreads, const int numThreads, std::vector < int >&globalStackWrite) { T pivot; int i, j; /* this pair consists of the new q and r values */ std::pair < int, int >myBorder; /* this variable indicates, whether the present thread does useful work atm */ bool idle = true; /* only thread number 0 does useful work in the beginning */ if (q != r) idle = false; while (true) { /* is the partition to be processed smaller than a certain threshhold? * -> then use insertion sort */ if (r - q < switchThresh) { myInsertSort(myVec, q, r); /* and mark the region as sorted, by setting q to r, which makes * the thread run into the next while loop, where it requests * new work */ q = r; } /* are we done with this part of the vector? * -> then pop another one off the todo-Stack and process it */ while (q >= r) { /* only one thread at the time should access the todo-Stack and * the numBusyThreads and idle variables */ # pragma omp critical { /* something left on the global stack to do? */ if (false == globalTodoStack.empty()) { if (true == idle) ++numBusyThreads; idle = false; myBorder = globalTodoStack.top(); globalTodoStack.pop(); q = myBorder.first; r = myBorder.second; globalStackWrite[omp_get_thread_num()]++; /* nothing left to do on the stack */ } else { if (false == idle) --numBusyThreads; idle = true; /* busy wait here (not optimal) */ } } /* end critical section */ /* if all threads are done, break out of this function * note, that the value of numBusyThreads is current, as there * is a flush implied at the end of the last critical section */ if (numBusyThreads == 0) { return; } } /* end while ( q >= r ) */ /* now actually sort our partition */ /* choose pivot, initialize borders */ pivot = myVec[r]; i = q - 1; j = r; /* partition step, which moves smaller numbers to the left * and larger numbers to the right of the pivot */ while (true) { while (myVec[++i] < pivot); while (myVec[--j] > pivot); if (i >= j) break; std::swap(myVec[i], myVec[j]); } std::swap(myVec[i], myVec[r]); /* only push on the stack, if there is enough left to do */ if (i - 1 - q > switchThresh) { myBorder = std::make_pair(q, i - 1); # pragma omp critical { globalTodoStack.push(myBorder); globalStackWrite[omp_get_thread_num()]++; } } else { /* for small partitions use insertion sort */ myInsertSort(myVec, q, i - 1); } q = i + 1; /* r stays the same for the next iteration */ } }