/* * Implements the partition selection algorithm with randomized selection * * From: http://en.wikipedia.org/wiki/Selection_algorithm#Linear_general_selection_algorithm_-_.22Median_of_Medians_algorithm.22 * * Arguments: * char **lists: A list of lists, the first of which contains the values used for pivots * the 2nd and further lists will be pivoted alongside the first. * A common usage would be to have the first list point to an array * of values, then the second would point to another char ** list of * strings. The second list would have it's pointer values moved * around as part of the pivots, and the index location where the * partition value (say for the median) occurs would allow a reference * to the associated strings in the second list. * size_t nlists the number of lists * size_t *widths An array of widths, one for each list * int left,right The left and right boundary of the list to be pivoted * int pivotIndex The index around which to pivot the list. A common use-case is * to choose pivotIndex = listLength/2, then the pivot will provide * the median location. * int (*compar) A comparison function for the first list, which takes two pointers * to values in the first list and returns 0,-1 or 1 when the first * value is equal, less than or greater than the second. * char **tmp A list of temporary variables, allocated with the size of the value * in each list * void *pvalue Pointers to temporary variable allocated with the width of the * values of the first list. */ static int partition_pivot(char **lists, size_t nlists, size_t *widths, int left, int right, int pivotIndex, int (*compar)(const void *, const void *), char **tmp, void *pvalue) { int storeIndex = left; memcpy(pvalue,lists[0]+pivotIndex*widths[0],widths[0]); SWAPN(lists,nlists,widths,tmp,pivotIndex,right) // Move pivot to end for (int i=left;i<right;i++) { if (compar(lists[0]+i*widths[0],pvalue) <= 0) { SWAPN(lists,nlists,widths,tmp,i,storeIndex) storeIndex++; } } SWAPN(lists,nlists,widths,tmp,storeIndex,right) // Move pivot to its final place return(storeIndex); }
static void merge(void *base, size_t size, cmpfn compare CTXPARAM, size_t bufsize, size_t start, size_t m, size_t n) { size_t s6blksize = squareroot(m+n); size_t s6bufsize = 2*s6blksize + m/s6blksize + n/s6blksize; size_t blkstart, blkend, blksize, blocks, mblocks, nblocks, mextra, nextra; int method; if (m + n <= RECURSION_THRESHOLD) { rmerge(base, size, compare CTXARG, start, m, n); return; } /* * Decide which merge algorithm we're using, and work out the * size of the blocks. */ if (bufsize >= s6bufsize) { method = 6; /* Section 6 standard merge */ blksize = s6blksize; } else { method = 5; /* Section 5 limited-buffer merge */ blksize = (m+n + bufsize - 2) / (bufsize - 1); } /* * We're going to partition our array into blocks of size * blksize, by leaving a partial block at the start and one at * the end so that the m-blocks and n-blocks abut directly. */ mblocks = m / blksize; mextra = m - mblocks * blksize; blkstart = start + mextra; nblocks = n / blksize; nextra = n - nblocks * blksize; blocks = mblocks + nblocks; blkend = blkstart + blocks * blksize; if (mblocks && nblocks) { if (method == 6) { size_t mi, mb, mr, ni, nb, nr, blkindex; size_t mergebufin, mergebufout; /* * Section 6 merge. We need a tracking buffer of size * "blocks", and a merge buffer of size 2*blksize. */ size_t mergebuf = 0; /* start of buffer space */ size_t trackbuf = mergebuf + 2*blksize; assert(trackbuf + blocks <= s6bufsize); /* * Start by sorting the tracking buffer, since we're * going to use it to order the output blocks of the * merge. */ bufsort(base, size, compare CTXARG, trackbuf, blocks); /* * Now simply start reading the two input lists of * blocks, and writing merged output into the merge * buffer. */ mi = blkstart; /* index of next element */ mb = 0; /* index of current block */ mr = blksize; /* elements remaining in that block */ ni = start + m; /* index of next element */ nb = mblocks; /* index of current block */ nr = blksize; /* elements remaining in that block */ mergebufin = mergebufout = 0; while (mi < start + m || ni < blkend) { blkindex = blocks; /* dummy value: no finished block */ /* * Decide which list we're taking an item from. */ if (ni >= blkend || (mi < start + m && COMPARE(mi, ni) <= 0)) { /* Take from the m-list. */ SWAP(mergebufin, mi); mergebufin = (mergebufin + 1) % (2*blksize); mi++; mr--; if (mr == 0) { blkindex = mb++; mr = blksize; } } else { /* Take from the n-list. */ SWAP(mergebufin, ni); mergebufin = (mergebufin + 1) % (2*blksize); ni++; nr--; if (nr == 0) { blkindex = nb++; nr = blksize; } } /* * If we've emptied (i.e. filled with merge buffer * elements) an entire input block on either the * m- or n-side, we now fill it with merge output * data from the merge buffer. */ if (blkindex < blocks) { size_t smallest, i; SWAPN(mergebufout, blkstart + blksize * blkindex, blksize); mergebufout = (mergebufout + blksize) % (2*blksize); /* * Now we must find the smallest as yet unused * element in the tracking buffer, and swap it * into the place matching this block, so that * we know what order to output the blocks in * when we've finished. */ smallest = blkindex; for (i = mb; i < mblocks; i++) if (smallest == blocks || COMPARE(trackbuf + i, trackbuf + smallest) < 0) smallest = i; for (i = nb; i < blocks; i++) if (smallest == blocks || COMPARE(trackbuf + i, trackbuf + smallest) < 0) smallest = i; if (smallest != blkindex) SWAP(trackbuf + blkindex, trackbuf + smallest); } } /* * Our stably merged output list is now sitting in our * block list, except that the blocks are permuted * into some arbitrary wrong order, and the tracking * buffer knows what order that is. So we now * selection-sort the tracking buffer, and swap real * blocks in parallel with the swaps done in that * sort. (Selection sort is used because it uses the * minimum number of swaps, and they're what's * expensive here.) */ { size_t i, j, smallest; for (i = 0; i < blocks; i++) { smallest = i; for (j = i+1; j < blocks; j++) { if (COMPARE(trackbuf + j, trackbuf + smallest) < 0) smallest = j; } if (i != smallest) { SWAP(trackbuf + i, trackbuf + smallest); SWAPN(blkstart + i * blksize, blkstart + smallest * blksize, blksize); } } } /* * And that's our main merge complete. */ } else { size_t firstn, currpos; size_t movedstart = 0, movedend = 0; int movedseq = 0; /* * Sort the buffer. */ bufsort(base, size, compare CTXARG, 0, blocks); /* * Identify the buffer element corresponding to the * first n-block. We will keep this index correct * throughout the following sort, so that we can * always tell which input sequence a given block * belongs to by comparing its corresponding element * in the buffer with this one. */ firstn = mblocks; /* * Selection-sort the blocks by their first element, * breaking ties using the buffer. We also mirror * block swaps in the buffer, and keep firstn up to * date in the process. */ { size_t i, j, smallest; for (i = 0; i < blocks; i++) { smallest = i; for (j = i+1; j < blocks; j++) { int cmp = COMPARE(blkstart + j * blksize, blkstart + smallest * blksize); if (!cmp) cmp = COMPARE(j, smallest); if (cmp < 0) smallest = j; } if (i != smallest) { SWAPN(blkstart + i * blksize, blkstart + smallest * blksize, blksize); SWAP(i, smallest); if (i == firstn || smallest == firstn) firstn = i + smallest - firstn; } } } /* * "currpos" will track the next unmerged element from * here to the end of the array. */ currpos = blkstart; while (currpos < blkend) { int seqA, seqB, cmp; size_t i, apos = currpos, bpos; /* * We're looking at the next unmerged element, * which I'll call A. Find out which original * sequence it's from: usually we do this by * finding the buffer entry corresponding to its * block, although if A is part of a stretch of * the array we moved in a previous iteration then * the buffer may be wrong. */ if (apos >= movedstart && apos < movedend) { seqA = movedseq; bpos = movedend; } else { i = (apos - blkstart) / blksize; seqA = COMPARE(i, firstn) >= 0;/* 0 means m, 1 means n */ bpos = blkstart + (i+1) * blksize; } /* * Search forward to find the next element B from * the _other_ sequence, whichever it is. */ i = (bpos - blkstart) / blksize; seqB = !seqA; while (i < blocks && (COMPARE(i, firstn) >= 0) == seqA) { i++; bpos = blkstart + i * blksize; } /* * If B doesn't exist (we've hit the end of the * list), we've finished! */ if (bpos == blkend) break; /* * Otherwise, see if some merging needs to be * done. If B comes after the element directly * before it (from the other sequence), then we * don't need to move anything just yet. * * (Note that "comes after" must be interpreted * stably, which means we must break ties by * referring to our knowledge of which original * sequences the two elements are from.) */ cmp = COMPARE(bpos-1, bpos); if (cmp == 0) cmp = seqA - seqB; /* break ties correctly */ if (cmp < 0) { /* * This is the easy case: everything from A to * just before B is already correctly merged, * so we can simply advance currpos. */ currpos = bpos; movedstart = movedend = 0; } else { size_t bot, mid, top; size_t cpos; /* * And this is the case where we actually have * to do some work (bah): B must be inserted * somewhere between A and where it currently * is. (Up to and including putting it * _before_ A itself.) So we start by * binary-searching for that insertion point. * Again, we must take care to break * comparison ties in a direction dependent on * seqA and seqB. */ bot = apos-1; top = bpos; while (top - bot > 1) { mid = (top + bot) / 2; cmp = COMPARE(mid, bpos); if (cmp == 0) cmp = seqA - seqB; if (cmp < 0) bot = mid; else top = mid; } cpos = top; /* * Now "cpos" points at some element C of A's * sequence which comes after element B. (The * above search cannot have terminated with * "top" pointing at B itself, because * otherwise we'd be in the easy case above.) * * We can't just move B to that position yet, * though, because there may be further * elements of _B's_ sequence which come * before C. So now we search forward for * those. */ bot = bpos; /* i is still pointing at B's block number; start there. */ while (++i < blocks) { /* * See if we can skip an entire block in * our search. */ if ((COMPARE(i, firstn) >= 0) != seqB) break; /* no, this is A's sequence again */ /* Check the first element of the new block. */ cmp = COMPARE(blkstart + i * blksize, cpos); if (cmp == 0) cmp = seqB - seqA; if (cmp > 0) { break; /* gone too far */ } else { /* yes, we can skip a block */ bot = blkstart + i * blksize; } } /* Now we can binary-search one block only. */ top = bot - (bot-blkstart) % blksize + blksize; while (top - bot > 1) { mid = (top + bot) / 2; cmp = COMPARE(mid, cpos); if (cmp == 0) cmp = seqB - seqA; if (cmp < 0) bot = mid; else top = mid; } /* * Now we're ready. We have a chunk of array * looking like * * apos cpos bpos top * +------+------+----------+ * | P | Q | R | * +------+------+----------+ * * and we know that everything up to cpos is * correctly positioned, and that everything * in stretch R must come before element C (at * the start of stretch Q). So we can * block-exchange Q with R, and update currpos * to point at where the end of R ended up. */ block_exchange(base, size, cpos, bpos - cpos, top - bpos); currpos = cpos + (top - bpos); /* * And record the fact that we've moved * stretch Q, so we know which sequence it * belongs to better than the buffer does. */ movedstart = currpos; movedend = top; movedseq = seqA; } } } } #ifdef TESTMODE /* * Our main block sequence should now be correctly merged. */ subseq_should_be_sorted(blkstart, blkend - blkstart); #endif /* * Now we need to stably distribute the partial blocks from * each end into the main sorted sequence, and we're done. */ ldistribute(base, size, compare CTXARG, start, blkend-start, mextra); rdistribute(base, size, compare CTXARG, start, m+n, nextra); }