static void poly_mod_mul(UV* px, UV* py, UV* res, UV r, UV mod) { UV degpx, degpy; UV i, j, pxi, pyj, rindex; /* Determine max degree of px and py */ for (degpx = r-1; degpx > 0 && !px[degpx]; degpx--) ; /* */ for (degpy = r-1; degpy > 0 && !py[degpy]; degpy--) ; /* */ /* We can sum at least j values at once */ j = (mod >= HALF_WORD) ? 0 : (UV_MAX / ((mod-1)*(mod-1))); if (j >= degpx || j >= degpy) { /* res will be written completely, so no need to set */ for (rindex = 0; rindex < r; rindex++) { UV sum = 0; j = rindex; for (i = 0; i <= degpx; i++) { if (j <= degpy) sum += px[i] * py[j]; j = (j == 0) ? r-1 : j-1; } res[rindex] = sum % mod; } } else { memset(res, 0, r * sizeof(UV)); /* Zero result accumulator */ for (i = 0; i <= degpx; i++) { pxi = px[i]; if (pxi == 0) continue; if (mod < HALF_WORD) { for (j = 0; j <= degpy; j++) { pyj = py[j]; rindex = i+j; if (rindex >= r) rindex -= r; res[rindex] = (res[rindex] + (pxi*pyj) ) % mod; } } else { for (j = 0; j <= degpy; j++) { pyj = py[j]; rindex = i+j; if (rindex >= r) rindex -= r; res[rindex] = muladdmod(pxi, pyj, res[rindex], mod); } } } } memcpy(px, res, r * sizeof(UV)); /* put result in px */ }
/* Bach and Sorenson (1993) would be better */ static int is_perfect_power(UV n) { UV b, last; if ((n <= 3) || (n == UV_MAX)) return 0; if ((n & (n-1)) == 0) return 1; /* powers of 2 */ last = log2floor(n-1) + 1; #if (BITS_PER_WORD == 32) || (DBL_DIG > 19) if (1) { #elif DBL_DIG == 10 if (n < UVCONST(10000000000)) { #elif DBL_DIG == 15 if (n < UVCONST(1000000000000000)) { #else if ( n < (UV) pow(10, DBL_DIG) ) { #endif /* Simple floating point method. Fast, but need enough mantissa. */ b = sqrt(n)+0.5; if (b*b == n) return 1; /* perfect square */ for (b = 3; b < last; b = _XS_next_prime(b)) { UV root = pow(n, 1.0 / (double)b) + 0.5; if ( ((UV)(pow(root, b)+0.5)) == n) return 1; } } else { /* Dietzfelbinger, algorithm 2.3.5 (without optimized exponential) */ for (b = 2; b <= last; b++) { UV a = 1; UV c = n; while (c >= HALF_WORD) c = (1+c)>>1; while ((c-a) >= 2) { UV m, maxm, p, i; m = (a+c) >> 1; maxm = UV_MAX / m; p = m; for (i = 2; i <= b; i++) { if (p > maxm) { p = n+1; break; } p *= m; } if (p == n) return 1; if (p < n) a = m; else c = m; } } } return 0; } static UV order(UV r, UV n, UV limit) { UV j; UV t = 1; for (j = 1; j <= limit; j++) { t = (t * n) % r; if (t == 1) break; } return j; } static void poly_print(UV* poly, UV r) { int i; for (i = r-1; i >= 1; i--) { if (poly[i] != 0) printf("%lux^%d + ", poly[i], i); } if (poly[0] != 0) printf("%lu", poly[0]); printf("\n"); } static void poly_mod_mul(UV* px, UV* py, UV* res, UV r, UV mod) { UV i, j, pxi, pyj, rindex; memset(res, 0, r * sizeof(UV)); for (i = 0; i < r; i++) { pxi = px[i]; if (pxi == 0) continue; for (j = 0; j < r; j++) { pyj = py[j]; if (pyj == 0) continue; rindex = (i+j) < r ? i+j : i+j-r; /* (i+j) % r */ if (mod < HALF_WORD) { res[rindex] = (res[rindex] + (pxi*pyj) ) % mod; } else { res[rindex] = muladdmod(pxi, pyj, res[rindex], mod); } } } memcpy(px, res, r * sizeof(UV)); /* put result in px */ }