/* * Swap half-rows of 2^n * (2*2^n) matrix. * FORWARD_CYCLE: even/odd permutation of the halfrows. * BACKWARD_CYCLE: reverse the even/odd permutation. */ static int swap_halfrows_pow2(mpd_uint_t *matrix, mpd_size_t rows, mpd_size_t cols, int dir) { mpd_uint_t buf1[BUFSIZE]; mpd_uint_t buf2[BUFSIZE]; mpd_uint_t *readbuf, *writebuf, *hp; mpd_size_t *done, dbits; mpd_size_t b = BUFSIZE, stride; mpd_size_t hn, hmax; /* halfrow number */ mpd_size_t m, r=0; mpd_size_t offset; mpd_size_t next; assert(cols == mul_size_t(2, rows)); if (dir == FORWARD_CYCLE) { r = rows; } else if (dir == BACKWARD_CYCLE) { r = 2; } else { abort(); /* GCOV_NOT_REACHED */ } m = cols - 1; hmax = rows; /* cycles start at odd halfrows */ dbits = 8 * sizeof *done; if ((done = mpd_calloc(hmax/(sizeof *done) + 1, sizeof *done)) == NULL) { return 0; } for (hn = 1; hn <= hmax; hn += 2) { if (done[hn/dbits] & mpd_bits[hn%dbits]) { continue; } readbuf = buf1; writebuf = buf2; for (offset = 0; offset < cols/2; offset += b) { stride = (offset + b < cols/2) ? b : cols/2-offset; hp = matrix + hn*cols/2; memcpy(readbuf, hp+offset, stride*(sizeof *readbuf)); pointerswap(&readbuf, &writebuf); next = mulmod_size_t(hn, r, m); hp = matrix + next*cols/2; while (next != hn) { memcpy(readbuf, hp+offset, stride*(sizeof *readbuf)); memcpy(hp+offset, writebuf, stride*(sizeof *writebuf)); pointerswap(&readbuf, &writebuf); done[next/dbits] |= mpd_bits[next%dbits]; next = mulmod_size_t(next, r, m); hp = matrix + next*cols/2; } memcpy(hp+offset, writebuf, stride*(sizeof *writebuf)); done[hn/dbits] |= mpd_bits[hn%dbits]; } } mpd_free(done); return 1; }
/* * Knuth, TAOCP Volume 2, 4.3.1: * q, r := quotient and remainder of uconst (len nplusm) * divided by vconst (len n) * nplusm > n * * If r is not NULL, r will contain the remainder. If r is NULL, the * return value indicates if there is a remainder: 1 for true, 0 for * false. A return value of -1 indicates an error. */ int _mpd_basedivmod(mpd_uint_t *q, mpd_uint_t *r, const mpd_uint_t *uconst, const mpd_uint_t *vconst, mpd_size_t nplusm, mpd_size_t n) { mpd_uint_t ustatic[MPD_MINALLOC_MAX]; mpd_uint_t vstatic[MPD_MINALLOC_MAX]; mpd_uint_t *u = ustatic; mpd_uint_t *v = vstatic; mpd_uint_t d, qhat, rhat, w2[2]; mpd_uint_t hi, lo, x; mpd_uint_t carry; mpd_size_t i, j, m; int retval = 0; assert(n > 1 && nplusm >= n); m = sub_size_t(nplusm, n); /* D1: normalize */ d = MPD_RADIX / (vconst[n-1] + 1); if (nplusm >= MPD_MINALLOC_MAX) { if ((u = mpd_calloc(nplusm+1, sizeof *u)) == NULL) { return -1; } } if (n >= MPD_MINALLOC_MAX) { if ((v = mpd_calloc(n+1, sizeof *v)) == NULL) { mpd_free(u); return -1; } } _mpd_shortmul(u, uconst, nplusm, d); _mpd_shortmul(v, vconst, n, d); /* D2: loop */ rhat = 0; for (j=m; j != MPD_SIZE_MAX; j--) { /* D3: calculate qhat and rhat */ rhat = _mpd_shortdiv(w2, u+j+n-1, 2, v[n-1]); qhat = w2[1] * MPD_RADIX + w2[0]; while (1) { if (qhat < MPD_RADIX) { _mpd_singlemul(w2, qhat, v[n-2]); if (w2[1] <= rhat) { if (w2[1] != rhat || w2[0] <= u[j+n-2]) { break; } } } qhat -= 1; rhat += v[n-1]; if (rhat < v[n-1] || rhat >= MPD_RADIX) { break; } } /* D4: multiply and subtract */ carry = 0; for (i=0; i <= n; i++) { _mpd_mul_words(&hi, &lo, qhat, v[i]); lo = carry + lo; if (lo < carry) hi++; _mpd_div_words_r(&hi, &lo, hi, lo); x = u[i+j] - lo; carry = (u[i+j] < x); u[i+j] = carry ? x+MPD_RADIX : x; carry += hi; } q[j] = qhat; /* D5: test remainder */ if (carry) { q[j] -= 1; /* D6: add back */ (void)_mpd_baseadd(u+j, u+j, v, n+1, n); } } /* D8: unnormalize */ if (r != NULL) { _mpd_shortdiv(r, u, n, d); /* we are not interested in the return value here */ retval = 0; } else { retval = !_mpd_isallzero(u, n); } if (u != ustatic) mpd_free(u); if (v != vstatic) mpd_free(v); return retval; }