void mpn_redc_n (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr ip) { mp_ptr xp, yp, scratch; mp_limb_t cy; mp_size_t rn; TMP_DECL; TMP_MARK; ASSERT (n > 8); rn = mpn_mulmod_bnm1_next_size (n); scratch = TMP_ALLOC_LIMBS (n + rn + mpn_mulmod_bnm1_itch (rn, n, n)); xp = scratch; mpn_mullo_n (xp, up, ip, n); yp = scratch + n; mpn_mulmod_bnm1 (yp, rn, xp, n, mp, n, scratch + n + rn); ASSERT_ALWAYS (2 * n > rn); /* could handle this */ cy = mpn_sub_n (yp + rn, yp, up, 2*n - rn); /* undo wrap around */ MPN_DECR_U (yp + 2*n - rn, rn, cy); cy = mpn_sub_n (rp, up + n, yp + n, n); if (cy != 0) mpn_add_n (rp, rp, mp, n); TMP_FREE; }
void mpn_dcpi1_bdiv_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_limb_t dinv, mp_ptr tp) { while (ABOVE_THRESHOLD (n, DC_BDIV_Q_THRESHOLD)) { mp_size_t lo, hi; mp_limb_t cy; lo = n >> 1; /* floor(n/2) */ hi = n - lo; /* ceil(n/2) */ cy = mpn_dcpi1_bdiv_qr_n (qp, np, dp, lo, dinv, tp); mpn_mullo_n (tp, qp, dp + hi, lo); mpn_sub_n (np + hi, np + hi, tp, lo); if (lo < hi) { cy += mpn_submul_1 (np + lo, qp, lo, dp[lo]); np[n - 1] -= cy; } qp += lo; np += lo; n -= lo; } mpn_sbpi1_bdiv_q (qp, np, n, dp, n, dinv); }
/* Compute r such that r^2 * y = 1 (mod 2^{b+1}). Return non-zero if such an integer r exists. Iterates r' <-- (3r - r^3 y) / 2 using Hensel lifting. Since we divide by two, the Hensel lifting is somewhat degenerates. Therefore, we lift from 2^b to 2^{b+1}-1. FIXME: (1) Simplify to do precision book-keeping in limbs rather than bits. (2) Rewrite iteration as r' <-- r - r (r^2 y - 1) / 2 and take advantage of zero low part of r^2 y - 1. (3) Use wrap-around trick. (4) Use a small table to get starting value. */ int mpn_bsqrtinv (mp_ptr rp, mp_srcptr yp, mp_bitcnt_t bnb, mp_ptr tp) { mp_ptr tp2, tp3; mp_limb_t k; mp_size_t bn, order[GMP_LIMB_BITS + 1]; int i, d; ASSERT (bnb > 0); bn = 1 + bnb / GMP_LIMB_BITS; tp2 = tp + bn; tp3 = tp + 2 * bn; k = 3; rp[0] = 1; if (bnb == 1) { if ((yp[0] & 3) != 1) return 0; } else { if ((yp[0] & 7) != 1) return 0; d = 0; for (; bnb != 2; bnb = (bnb + 2) >> 1) order[d++] = bnb; for (i = d - 1; i >= 0; i--) { bnb = order[i]; bn = 1 + bnb / GMP_LIMB_BITS; mpn_mul_1 (tp, rp, bn, k); mpn_powlo (tp2, rp, &k, 1, bn, tp3); mpn_mullo_n (rp, yp, tp2, bn); #if HAVE_NATIVE_mpn_rsh1sub_n mpn_rsh1sub_n (rp, tp, rp, bn); #else mpn_sub_n (tp2, tp, rp, bn); mpn_rshift (rp, tp2, bn, 1); #endif } } return 1; }
void mpn_bsqrt (mp_ptr rp, mp_srcptr ap, mp_bitcnt_t nb, mp_ptr tp) { mp_ptr sp; mp_size_t n; ASSERT (nb > 0); n = nb / GMP_NUMB_BITS; sp = tp + n; mpn_bsqrtinv (tp, ap, nb, sp); mpn_mullo_n (rp, tp, ap, n); }
/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */ void mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k) { mp_ptr tp; TMP_DECL; ASSERT (n > 0); ASSERT (ap[0] & 1); ASSERT (k & 1); if (k == 1) { MPN_COPY (rp, ap, n); return; } TMP_MARK; tp = TMP_ALLOC_LIMBS (n); mpn_broot_invm1 (tp, ap, n, k); mpn_mullo_n (rp, tp, ap, n); TMP_FREE; }
/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd. Iterates r' <-- r - r * (a^{k-1} r^k - 1) / n If a^{k-1} r^k = 1 (mod 2^m), then a^{k-1} r'^k = 1 (mod 2^{2m}), Compute the update term as r' = r - (a^{k-1} r^{k+1} - r) / k where we still have cancellation of low limbs. */ void mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k) { mp_size_t sizes[GMP_LIMB_BITS * 2]; mp_ptr akm1, tp, rnp, ep; mp_limb_t a0, r0, km1, kp1h, kinv; mp_size_t rn; unsigned i; TMP_DECL; ASSERT (n > 0); ASSERT (ap[0] & 1); ASSERT (k & 1); ASSERT (k >= 3); TMP_MARK; akm1 = TMP_ALLOC_LIMBS (4*n); tp = akm1 + n; km1 = k-1; /* FIXME: Could arrange the iteration so we don't need to compute this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note that we can use wraparound also for a*r, since the low half is unchanged from the previous iteration. Or possibly mulmid. Also, a r = a^{1/k}, so we get that value too, for free? */ mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */ a0 = ap[0]; binvert_limb (kinv, k); /* 4 bits: a^{1/k - 1} (mod 16): a % 8 1 3 5 7 k%4 +------- 1 |1 1 1 1 3 |1 9 9 1 */ r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8); r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */ r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */ r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */ #if GMP_NUMB_BITS > 32 { unsigned prec = 32; do { r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); prec *= 2; } while (prec < GMP_NUMB_BITS); } #endif rp[0] = r0; if (n == 1) { TMP_FREE; return; } /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */ kp1h = k/2 + 1; /* FIXME: Special case for two limb iteration. */ rnp = TMP_ALLOC_LIMBS (2*n + 1); ep = rnp + n; /* FIXME: Possible to this on the fly with some bit fiddling. */ for (i = 0; n > 1; n = (n + 1)/2) sizes[i++] = n; rn = 1; while (i-- > 0) { /* Compute x^{k+1}. */ mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the final iteration. */ mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp); /* Multiply by a^{k-1}. Can use wraparound; low part equals r. */ mpn_mullo_n (ep, rnp, akm1, sizes[i]); ASSERT (mpn_cmp (ep, rp, rn) == 0); ASSERT (sizes[i] <= 2*rn); mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0); mpn_neg (rp + rn, rp + rn, sizes[i] - rn); rn = sizes[i]; } TMP_FREE; }
int main (int argc, char **argv) { gmp_randstate_ptr rands; mp_ptr ap, rp, pp, app, scratch; int count = COUNT; unsigned i; TMP_DECL; TMP_MARK; if (argc > 1) { char *end; count = strtol (argv[1], &end, 0); if (*end || count <= 0) { fprintf (stderr, "Invalid test count: %s.\n", argv[1]); return 1; } } tests_start (); rands = RANDS; ap = TMP_ALLOC_LIMBS (MAX_LIMBS); rp = TMP_ALLOC_LIMBS (MAX_LIMBS); pp = TMP_ALLOC_LIMBS (MAX_LIMBS); app = TMP_ALLOC_LIMBS (MAX_LIMBS); scratch = TMP_ALLOC_LIMBS (5*MAX_LIMBS); for (i = 0; i < count; i++) { mp_size_t n; mp_limb_t k; n = 1 + gmp_urandomm_ui (rands, MAX_LIMBS); if (i & 1) mpn_random2 (ap, n); else mpn_random (ap, n); ap[0] |= 1; if (i < 100) k = 3 + 2*i; else { mpn_random (&k, 1); if (k < 3) k = 3; else k |= 1; } mpn_brootinv (rp, ap, n, k, scratch); mpn_powlo (pp, rp, &k, 1, n, scratch); mpn_mullo_n (app, ap, pp, n); if (app[0] != 1 || !(n == 1 || mpn_zero_p (app+1, n-1))) { gmp_fprintf (stderr, "mpn_brootinv returned bad result: %u limbs\n", (unsigned) n); gmp_fprintf (stderr, "k = %Mx\n", k); gmp_fprintf (stderr, "a = %Nx\n", ap, n); gmp_fprintf (stderr, "r = %Nx\n", rp, n); gmp_fprintf (stderr, "r^n = %Nx\n", pp, n); gmp_fprintf (stderr, "a r^n = %Nx\n", app, n); abort (); } } TMP_FREE; tests_end (); return 0; }
void mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp) { mp_ptr tp2, tp3; mp_limb_t kinv, k2, r0, y0; mp_size_t order[GMP_LIMB_BITS + 1]; int i, d; ASSERT (bn > 0); ASSERT ((k & 1) != 0); tp2 = tp + bn; tp3 = tp + 2 * bn; k2 = k + 1; binvert_limb (kinv, k); /* 4-bit initial approximation: y%16 | 1 3 5 7 9 11 13 15, k%4 +-------------------------+k2%4 1 | 1 11 13 7 9 3 5 15 | 2 3 | 1 3 5 7 9 11 13 15 | 0 */ y0 = yp[0]; r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & (k2 << 2) & 8); /* 4 bits */ r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7f)); /* 8 bits */ r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7fff)); /* 16 bits */ #if GMP_NUMB_BITS > 16 { unsigned prec = 16; do { r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2)); prec *= 2; } while (prec < GMP_NUMB_BITS); } #endif rp[0] = r0; if (bn == 1) return; /* This initialization doesn't matter for the result (any garbage is cancelled in the iteration), but proper initialization makes valgrind happier. */ MPN_ZERO (rp+1, bn-1); d = 0; for (; bn > 1; bn = (bn + 1) >> 1) order[d++] = bn; for (i = d - 1; i >= 0; i--) { bn = order[i]; mpn_mul_1 (tp, rp, bn, k2); mpn_powlo (tp2, rp, &k2, 1, bn, tp3); mpn_mullo_n (rp, yp, tp2, bn); mpn_sub_n (tp2, tp, rp, bn); mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, kinv, 0); } }
int main (int argc, char **argv) { mp_ptr ap, bp, refp, pp, scratch; int count = COUNT; int test; gmp_randstate_ptr rands; TMP_DECL; TMP_MARK; if (argc > 1) { char *end; count = strtol (argv[1], &end, 0); if (*end || count <= 0) { fprintf (stderr, "Invalid test count: %s.\n", argv[1]); return 1; } } tests_start (); rands = RANDS; #define mpn_mullo_itch(n) (0) ap = TMP_ALLOC_LIMBS (MAX_N); bp = TMP_ALLOC_LIMBS (MAX_N); refp = TMP_ALLOC_LIMBS (MAX_N * 2); pp = 1+TMP_ALLOC_LIMBS (MAX_N + 2); scratch = 1+TMP_ALLOC_LIMBS (mpn_mullo_itch (MAX_N) + 2); for (test = 0; test < count; test++) { unsigned size_min; unsigned size_range; mp_size_t n; mp_size_t itch; mp_limb_t p_before, p_after, s_before, s_after; for (size_min = 1; (1L << size_min) < MIN_N; size_min++) ; /* We generate an in the MIN_N <= n <= (1 << size_range). */ size_range = size_min + gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min); n = MIN_N + gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N); mpn_random2 (ap, n); mpn_random2 (bp, n); mpn_random2 (pp-1, n + 2); p_before = pp[-1]; p_after = pp[n]; itch = mpn_mullo_itch (n); ASSERT_ALWAYS (itch <= mpn_mullo_itch (MAX_N)); mpn_random2 (scratch-1, itch+2); s_before = scratch[-1]; s_after = scratch[itch]; mpn_mullo_n (pp, ap, bp, n); mpn_mul_n (refp, ap, bp, n); if (pp[-1] != p_before || pp[n] != p_after || scratch[-1] != s_before || scratch[itch] != s_after || mpn_cmp (refp, pp, n) != 0) { printf ("ERROR in test %d, n = %d", test, (int) n); if (pp[-1] != p_before) { printf ("before pp:"); mpn_dump (pp -1, 1); printf ("keep: "); mpn_dump (&p_before, 1); } if (pp[n] != p_after) { printf ("after pp:"); mpn_dump (pp + n, 1); printf ("keep: "); mpn_dump (&p_after, 1); } if (scratch[-1] != s_before) { printf ("before scratch:"); mpn_dump (scratch-1, 1); printf ("keep: "); mpn_dump (&s_before, 1); } if (scratch[itch] != s_after) { printf ("after scratch:"); mpn_dump (scratch + itch, 1); printf ("keep: "); mpn_dump (&s_after, 1); } mpn_dump (ap, n); mpn_dump (bp, n); mpn_dump (pp, n); mpn_dump (refp, n); abort(); } } TMP_FREE; tests_end (); return 0; }