Пример #1
0
/*
 * Implements the partition selection algorithm with randomized selection
 *
 * From: http://en.wikipedia.org/wiki/Selection_algorithm#Linear_general_selection_algorithm_-_.22Median_of_Medians_algorithm.22
 *
 * Arguments:
 * 	char **lists:	A list of lists, the first of which contains the values used for pivots
 * 			the 2nd and further lists will be pivoted alongside the first.
 * 			A common usage would be to have the first list point to an array
 * 			of values, then the second would point to another char ** list of
 * 			strings.  The second list would have it's pointer values moved
 * 			around as part of the pivots, and the index location where the
 * 			partition value (say for the median) occurs would allow a reference
 * 			to the associated strings in the second list.
 * 	size_t nlists	the number of lists
 * 	size_t *widths	An array of widths, one for each list
 * 	int left,right	The left and right boundary of the list to be pivoted
 * 	int pivotIndex	The index around which to pivot the list.  A common use-case is
 * 			to choose pivotIndex = listLength/2, then the pivot will provide
 * 			the median location.
 * 	int (*compar)	A comparison function for the first list, which takes two pointers
 * 			to values in the first list and returns 0,-1 or 1 when the first
 * 			value is equal, less than or greater than the second.
 * 	char **tmp 	A list of temporary variables, allocated with the size of the value
 * 			in each list
 * 	void *pvalue	Pointers to temporary variable allocated with the width of the
 * 			values of the first list.
 */
static int
partition_pivot(char **lists, size_t nlists, size_t *widths,
		int left, int right, int pivotIndex,
		int (*compar)(const void *, const void *),
		char **tmp, void *pvalue)
{
	int storeIndex = left;

	memcpy(pvalue,lists[0]+pivotIndex*widths[0],widths[0]);

	SWAPN(lists,nlists,widths,tmp,pivotIndex,right) // Move pivot to end
	for (int i=left;i<right;i++)
	{
		if (compar(lists[0]+i*widths[0],pvalue) <= 0)
		{
			SWAPN(lists,nlists,widths,tmp,i,storeIndex)
			storeIndex++;
		}
	}
	SWAPN(lists,nlists,widths,tmp,storeIndex,right) // Move pivot to its final place
	return(storeIndex);
}
Пример #2
0
static void merge(void *base, size_t size, cmpfn compare CTXPARAM,
		  size_t bufsize, size_t start, size_t m, size_t n)
{
    size_t s6blksize = squareroot(m+n);
    size_t s6bufsize = 2*s6blksize + m/s6blksize + n/s6blksize;
    size_t blkstart, blkend, blksize, blocks, mblocks, nblocks, mextra, nextra;
    int method;

    if (m + n <= RECURSION_THRESHOLD) {
	rmerge(base, size, compare CTXARG, start, m, n);
	return;
    }

    /*
     * Decide which merge algorithm we're using, and work out the
     * size of the blocks.
     */
    if (bufsize >= s6bufsize) {
	method = 6;		       /* Section 6 standard merge */
	blksize = s6blksize;
    } else {
	method = 5;		       /* Section 5 limited-buffer merge */
	blksize = (m+n + bufsize - 2) / (bufsize - 1);
    }

    /*
     * We're going to partition our array into blocks of size
     * blksize, by leaving a partial block at the start and one at
     * the end so that the m-blocks and n-blocks abut directly.
     */
    mblocks = m / blksize;
    mextra = m - mblocks * blksize;
    blkstart = start + mextra;
    nblocks = n / blksize;
    nextra = n - nblocks * blksize;
    blocks = mblocks + nblocks;
    blkend = blkstart + blocks * blksize;

    if (mblocks && nblocks) {
	if (method == 6) {
	    size_t mi, mb, mr, ni, nb, nr, blkindex;
	    size_t mergebufin, mergebufout;

	    /*
	     * Section 6 merge. We need a tracking buffer of size
	     * "blocks", and a merge buffer of size 2*blksize.
	     */
	    size_t mergebuf = 0;       /* start of buffer space */
	    size_t trackbuf = mergebuf + 2*blksize;
	    assert(trackbuf + blocks <= s6bufsize);

	    /*
	     * Start by sorting the tracking buffer, since we're
	     * going to use it to order the output blocks of the
	     * merge.
	     */
	    bufsort(base, size, compare CTXARG, trackbuf, blocks);

	    /*
	     * Now simply start reading the two input lists of
	     * blocks, and writing merged output into the merge
	     * buffer.
	     */
	    mi = blkstart;	       /* index of next element */
	    mb = 0;		       /* index of current block */
	    mr = blksize;	       /* elements remaining in that block */
	    ni = start + m;	       /* index of next element */
	    nb = mblocks;	       /* index of current block */
	    nr = blksize;	       /* elements remaining in that block */
	    mergebufin = mergebufout = 0;
	    while (mi < start + m || ni < blkend) {
		blkindex = blocks;     /* dummy value: no finished block */

		/*
		 * Decide which list we're taking an item from.
		 */
		if (ni >= blkend ||
		    (mi < start + m && COMPARE(mi, ni) <= 0)) {
		    /* Take from the m-list. */
		    SWAP(mergebufin, mi);
		    mergebufin = (mergebufin + 1) % (2*blksize);
		    mi++;
		    mr--;
		    if (mr == 0) {
			blkindex = mb++;
			mr = blksize;
		    }
		} else {
		    /* Take from the n-list. */
		    SWAP(mergebufin, ni);
		    mergebufin = (mergebufin + 1) % (2*blksize);
		    ni++;
		    nr--;
		    if (nr == 0) {
			blkindex = nb++;
			nr = blksize;
		    }
		}

		/*
		 * If we've emptied (i.e. filled with merge buffer
		 * elements) an entire input block on either the
		 * m- or n-side, we now fill it with merge output
		 * data from the merge buffer.
		 */
		if (blkindex < blocks) {
		    size_t smallest, i;

		    SWAPN(mergebufout, blkstart + blksize * blkindex, blksize);
		    mergebufout = (mergebufout + blksize) % (2*blksize);

		    /*
		     * Now we must find the smallest as yet unused
		     * element in the tracking buffer, and swap it
		     * into the place matching this block, so that
		     * we know what order to output the blocks in
		     * when we've finished.
		     */
		    smallest = blkindex;
		    for (i = mb; i < mblocks; i++)
			if (smallest == blocks ||
			    COMPARE(trackbuf + i, trackbuf + smallest) < 0)
			    smallest = i;
		    for (i = nb; i < blocks; i++)
			if (smallest == blocks ||
			    COMPARE(trackbuf + i, trackbuf + smallest) < 0)
			    smallest = i;
		    if (smallest != blkindex)
			SWAP(trackbuf + blkindex, trackbuf + smallest);
		}
	    }

	    /*
	     * Our stably merged output list is now sitting in our
	     * block list, except that the blocks are permuted
	     * into some arbitrary wrong order, and the tracking
	     * buffer knows what order that is. So we now
	     * selection-sort the tracking buffer, and swap real
	     * blocks in parallel with the swaps done in that
	     * sort. (Selection sort is used because it uses the
	     * minimum number of swaps, and they're what's
	     * expensive here.)
	     */
	    {
		size_t i, j, smallest;
		for (i = 0; i < blocks; i++) {
		    smallest = i;
		    for (j = i+1; j < blocks; j++) {
			if (COMPARE(trackbuf + j, trackbuf + smallest) < 0)
			    smallest = j;
		    }
		    if (i != smallest) {
			SWAP(trackbuf + i, trackbuf + smallest);
			SWAPN(blkstart + i * blksize,
			      blkstart + smallest * blksize, blksize);
		    }
		}
	    }

	    /*
	     * And that's our main merge complete.
	     */
	} else {
	    size_t firstn, currpos;
	    size_t movedstart = 0, movedend = 0;
	    int movedseq = 0;

	    /*
	     * Sort the buffer.
	     */
	    bufsort(base, size, compare CTXARG, 0, blocks);

	    /*
	     * Identify the buffer element corresponding to the
	     * first n-block. We will keep this index correct
	     * throughout the following sort, so that we can
	     * always tell which input sequence a given block
	     * belongs to by comparing its corresponding element
	     * in the buffer with this one.
	     */
	    firstn = mblocks;

	    /*
	     * Selection-sort the blocks by their first element,
	     * breaking ties using the buffer. We also mirror
	     * block swaps in the buffer, and keep firstn up to
	     * date in the process.
	     */
	    {
		size_t i, j, smallest;
		for (i = 0; i < blocks; i++) {
		    smallest = i;
		    for (j = i+1; j < blocks; j++) {
			int cmp = COMPARE(blkstart + j * blksize,
					  blkstart + smallest * blksize);
			if (!cmp)
			    cmp = COMPARE(j, smallest);
			if (cmp < 0)
			    smallest = j;
		    }
		    if (i != smallest) {
			SWAPN(blkstart + i * blksize,
			      blkstart + smallest * blksize, blksize);
			SWAP(i, smallest);
			if (i == firstn || smallest == firstn)
			    firstn = i + smallest - firstn;
		    }
		}
	    }

	    /*
	     * "currpos" will track the next unmerged element from
	     * here to the end of the array.
	     */
	    currpos = blkstart;

	    while (currpos < blkend) {
		int seqA, seqB, cmp;
		size_t i, apos = currpos, bpos;

		/*
		 * We're looking at the next unmerged element,
		 * which I'll call A. Find out which original
		 * sequence it's from: usually we do this by
		 * finding the buffer entry corresponding to its
		 * block, although if A is part of a stretch of
		 * the array we moved in a previous iteration then
		 * the buffer may be wrong.
		 */
		if (apos >= movedstart && apos < movedend) {
		    seqA = movedseq;
		    bpos = movedend;
		} else {
		    i = (apos - blkstart) / blksize;
		    seqA = COMPARE(i, firstn) >= 0;/* 0 means m, 1 means n */
		    bpos = blkstart + (i+1) * blksize;
		}

		/*
		 * Search forward to find the next element B from
		 * the _other_ sequence, whichever it is.
		 */
		i = (bpos - blkstart) / blksize;
		seqB = !seqA;
		while (i < blocks && (COMPARE(i, firstn) >= 0) == seqA) {
		    i++;
		    bpos = blkstart + i * blksize;
		}

		/*
		 * If B doesn't exist (we've hit the end of the
		 * list), we've finished!
		 */
		if (bpos == blkend)
		    break;

		/*
		 * Otherwise, see if some merging needs to be
		 * done. If B comes after the element directly
		 * before it (from the other sequence), then we
		 * don't need to move anything just yet.
		 *
		 * (Note that "comes after" must be interpreted
		 * stably, which means we must break ties by
		 * referring to our knowledge of which original
		 * sequences the two elements are from.)
		 */
		cmp = COMPARE(bpos-1, bpos);
		if (cmp == 0)
		    cmp = seqA - seqB; /* break ties correctly */

		if (cmp < 0) {
		    /*
		     * This is the easy case: everything from A to
		     * just before B is already correctly merged,
		     * so we can simply advance currpos.
		     */
		    currpos = bpos;
		    movedstart = movedend = 0;
		} else {
		    size_t bot, mid, top;
		    size_t cpos;

		    /*
		     * And this is the case where we actually have
		     * to do some work (bah): B must be inserted
		     * somewhere between A and where it currently
		     * is. (Up to and including putting it
		     * _before_ A itself.) So we start by
		     * binary-searching for that insertion point.
		     * Again, we must take care to break
		     * comparison ties in a direction dependent on
		     * seqA and seqB.
		     */
		    bot = apos-1;
		    top = bpos;
		    while (top - bot > 1) {
			mid = (top + bot) / 2;
			cmp = COMPARE(mid, bpos);
			if (cmp == 0)
			    cmp = seqA - seqB;
			if (cmp < 0)
			    bot = mid;
			else
			    top = mid;
		    }
		    cpos = top;

		    /*
		     * Now "cpos" points at some element C of A's
		     * sequence which comes after element B. (The
		     * above search cannot have terminated with
		     * "top" pointing at B itself, because
		     * otherwise we'd be in the easy case above.)
		     *
		     * We can't just move B to that position yet,
		     * though, because there may be further
		     * elements of _B's_ sequence which come
		     * before C. So now we search forward for
		     * those.
		     */
		    bot = bpos;
		    /* i is still pointing at B's block number; start there. */
		    while (++i < blocks) {
			/*
			 * See if we can skip an entire block in
			 * our search.
			 */
			if ((COMPARE(i, firstn) >= 0) != seqB)
			    break;     /* no, this is A's sequence again */
			/* Check the first element of the new block. */
			cmp = COMPARE(blkstart + i * blksize, cpos);
			if (cmp == 0)
			    cmp = seqB - seqA;
			if (cmp > 0) {
			    break;     /* gone too far */
			} else {
			    /* yes, we can skip a block */
			    bot = blkstart + i * blksize;
			}
		    }
		    /* Now we can binary-search one block only. */
		    top = bot - (bot-blkstart) % blksize + blksize;
		    while (top - bot > 1) {
			mid = (top + bot) / 2;
			cmp = COMPARE(mid, cpos);
			if (cmp == 0)
			    cmp = seqB - seqA;
			if (cmp < 0)
			    bot = mid;
			else
			    top = mid;
		    }

		    /*
		     * Now we're ready. We have a chunk of array
		     * looking like
		     * 
		     * apos   cpos   bpos       top
		     *  +------+------+----------+
		     *  |  P   |  Q   |    R     |
		     *  +------+------+----------+
		     * 
		     * and we know that everything up to cpos is
		     * correctly positioned, and that everything
		     * in stretch R must come before element C (at
		     * the start of stretch Q). So we can
		     * block-exchange Q with R, and update currpos
		     * to point at where the end of R ended up.
		     */
		    block_exchange(base, size, cpos, bpos - cpos, top - bpos);
		    currpos = cpos + (top - bpos);

		    /*
		     * And record the fact that we've moved
		     * stretch Q, so we know which sequence it
		     * belongs to better than the buffer does.
		     */
		    movedstart = currpos;
		    movedend = top;
		    movedseq = seqA;
		}
	    }
	}
    }

#ifdef TESTMODE
    /*
     * Our main block sequence should now be correctly merged.
     */
    subseq_should_be_sorted(blkstart, blkend - blkstart);
#endif

    /*
     * Now we need to stably distribute the partial blocks from
     * each end into the main sorted sequence, and we're done.
     */
    ldistribute(base, size, compare CTXARG, start, blkend-start, mextra);
    rdistribute(base, size, compare CTXARG, start, m+n, nextra);
}