Example #1
0
/*
 * Swap half-rows of 2^n * (2*2^n) matrix.
 * FORWARD_CYCLE: even/odd permutation of the halfrows.
 * BACKWARD_CYCLE: reverse the even/odd permutation.
 */
static int
swap_halfrows_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols, int dir)
{
    mpd_uint_t buf1[BUFSIZE];
    mpd_uint_t buf2[BUFSIZE];
    mpd_uint_t *readbuf, *writebuf, *hp;
    mpd_size_t *done, dbits;
    mpd_size_t b = BUFSIZE, stride;
    mpd_size_t hn, hmax; /* halfrow number */
    mpd_size_t m, r=0;
    mpd_size_t offset;
    mpd_size_t next;


    assert(cols == mul_size_t(2, rows));

    if (dir == FORWARD_CYCLE) {
        r = rows;
    }
    else if (dir == BACKWARD_CYCLE) {
        r = 2;
    }
    else {
        abort(); /* GCOV_NOT_REACHED */
    }

    m = cols - 1;
    hmax = rows; /* cycles start at odd halfrows */
    dbits = 8 * sizeof *done;
    if ((done = mpd_calloc(hmax/(sizeof *done) + 1, sizeof *done)) == NULL) {
        return 0;
    }

    for (hn = 1; hn <= hmax; hn += 2) {

        if (done[hn/dbits] & mpd_bits[hn%dbits]) {
            continue;
        }

        readbuf = buf1; writebuf = buf2;

        for (offset = 0; offset < cols/2; offset += b) {

            stride = (offset + b < cols/2) ? b : cols/2-offset;

            hp = matrix + hn*cols/2;
            memcpy(readbuf, hp+offset, stride*(sizeof *readbuf));
            pointerswap(&readbuf, &writebuf);

            next = mulmod_size_t(hn, r, m);
            hp = matrix + next*cols/2;

            while (next != hn) {

                memcpy(readbuf, hp+offset, stride*(sizeof *readbuf));
                memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));
                pointerswap(&readbuf, &writebuf);

                done[next/dbits] |= mpd_bits[next%dbits];

                next = mulmod_size_t(next, r, m);
                    hp = matrix + next*cols/2;

            }

            memcpy(hp+offset, writebuf, stride*(sizeof *writebuf));

            done[hn/dbits] |= mpd_bits[hn%dbits];
        }
    }

    mpd_free(done);
    return 1;
}
Example #2
0
/*
 * Knuth, TAOCP Volume 2, 4.3.1:
 *     q, r := quotient and remainder of uconst (len nplusm)
 *             divided by vconst (len n)
 *     nplusm > n
 *
 * If r is not NULL, r will contain the remainder. If r is NULL, the
 * return value indicates if there is a remainder: 1 for true, 0 for
 * false.  A return value of -1 indicates an error.
 */
int
_mpd_basedivmod(mpd_uint_t *q, mpd_uint_t *r,
                const mpd_uint_t *uconst, const mpd_uint_t *vconst,
                mpd_size_t nplusm, mpd_size_t n)
{
	mpd_uint_t ustatic[MPD_MINALLOC_MAX];
	mpd_uint_t vstatic[MPD_MINALLOC_MAX];
	mpd_uint_t *u = ustatic;
	mpd_uint_t *v = vstatic;
	mpd_uint_t d, qhat, rhat, w2[2];
	mpd_uint_t hi, lo, x;
	mpd_uint_t carry;
	mpd_size_t i, j, m;
	int retval = 0;

	assert(n > 1 && nplusm >= n);
	m = sub_size_t(nplusm, n);

	/* D1: normalize */
	d = MPD_RADIX / (vconst[n-1] + 1);

	if (nplusm >= MPD_MINALLOC_MAX) {
		if ((u = mpd_calloc(nplusm+1, sizeof *u)) == NULL) {
			return -1;
		}
	}
	if (n >= MPD_MINALLOC_MAX) {
		if ((v = mpd_calloc(n+1, sizeof *v)) == NULL) {
			mpd_free(u);
			return -1;
		}
	}

	_mpd_shortmul(u, uconst, nplusm, d);
	_mpd_shortmul(v, vconst, n, d);

	/* D2: loop */
	rhat = 0;
	for (j=m; j != MPD_SIZE_MAX; j--) {

		/* D3: calculate qhat and rhat */
		rhat = _mpd_shortdiv(w2, u+j+n-1, 2, v[n-1]);
		qhat = w2[1] * MPD_RADIX + w2[0];

		while (1) {
			if (qhat < MPD_RADIX) {
				_mpd_singlemul(w2, qhat, v[n-2]);
				if (w2[1] <= rhat) {
					if (w2[1] != rhat || w2[0] <= u[j+n-2]) {
						break;
					}
				}
			}
			qhat -= 1;
			rhat += v[n-1];
			if (rhat < v[n-1] || rhat >= MPD_RADIX) {
				break;
			}
		}
		/* D4: multiply and subtract */
		carry = 0;
		for (i=0; i <= n; i++) {

			_mpd_mul_words(&hi, &lo, qhat, v[i]);

			lo = carry + lo;
			if (lo < carry) hi++;

			_mpd_div_words_r(&hi, &lo, hi, lo);

			x = u[i+j] - lo;
			carry = (u[i+j] < x);
			u[i+j] = carry ? x+MPD_RADIX : x;
			carry += hi;
		}
		q[j] = qhat;
		/* D5: test remainder */
		if (carry) {
			q[j] -= 1;
			/* D6: add back */
			(void)_mpd_baseadd(u+j, u+j, v, n+1, n);
		}
	}

	/* D8: unnormalize */
	if (r != NULL) {
		_mpd_shortdiv(r, u, n, d);
		/* we are not interested in the return value here */
		retval = 0;
	}
	else {
		retval = !_mpd_isallzero(u, n);
	}


if (u != ustatic) mpd_free(u);
if (v != vstatic) mpd_free(v);
return retval;
}