mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h) { mp_limb_t s, x, y, inverse, dummy, dmul, c1, c2; mp_limb_t c = 0; mp_size_t i; ASSERT (size >= 1); ASSERT (d & 1); modlimb_invert (inverse, d); dmul = d << GMP_NAIL_BITS; for (i = 0; i < size; i++) { ASSERT (c==0 || c==1); s = src[i]; SUBC_LIMB (c1, x, s, c); SUBC_LIMB (c2, y, x, h); c = c1 + c2; y = (y * inverse) & GMP_NUMB_MASK; umul_ppmm (h, dummy, y, dmul); } h += c; return h; }
/* Check divide and conquer hensel division routine. */ void check_dc_bdiv_q (void) { mp_limb_t np[2*MAX_LIMBS]; mp_limb_t np2[2*MAX_LIMBS]; mp_limb_t rp[3*MAX_LIMBS]; mp_limb_t dp[MAX_LIMBS]; mp_limb_t qp[2*MAX_LIMBS]; mp_limb_t dip; mp_size_t nn, rn, dn, qn; gmp_randstate_t rands; int i, j, s; gmp_randinit_default(rands); for (i = 0; i < ITERS; i++) { dn = (random() % (MAX_LIMBS - 5)) + 6; nn = (random() % MAX_LIMBS) + dn; mpn_rrandom (np, rands, nn); mpn_rrandom (dp, rands, dn); dp[0] |= 1; MPN_COPY(np2, np, nn); modlimb_invert(dip, dp[0]); mpn_dc_bdiv_q(qp, np, nn, dp, dn, dip); if (nn >= dn) mpn_mul(rp, qp, nn, dp, dn); else mpn_mul(rp, dp, dn, qp, nn); if (mpn_cmp(rp, np2, nn) != 0) { printf("failed: quotient wrong!\n"); printf ("nn = %lu, dn = %lu\n\n", nn, dn); gmp_printf (" np: %Nx\n\n", np2, nn); gmp_printf (" dp: %Nx\n\n", dp, dn); gmp_printf (" qp: %Nx\n\n", qp, nn); gmp_printf (" rp: %Nx\n\n", rp, nn); abort (); } } gmp_randclear(rands); }
// basic divexact mp_limb_t divexact_basic(mp_ptr qp,mp_ptr xp,mp_size_t n,mp_limb_t d) {int j;mp_limb_t c,h,q,dummy,h1,t,m; ASSERT(n>0);ASSERT(d!=0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n)); ASSERT(d%2==1);modlimb_invert(m,d); c=0;h=0;t=0; for(j=0;j<=n-1;j++) {h1=xp[j]; t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}// set borrow to c ; sbb t,h1 ; set c to borrow q=h1*m; qp[j]=q; umul_ppmm(h,dummy,q,d); ASSERT(dummy==h1);} // ie returns next quotient*-d return h+c;} // so (xp,n) = (qp,n)*d -ret*B^n and 0 <= ret < d
void one (mp_limb_t n) { mp_limb_t inv, prod; modlimb_invert (inv, n); prod = (inv * n) & GMP_NUMB_MASK; if (prod != 1) { printf ("modlimb_invert wrong\n"); mp_limb_trace (" n ", n); mp_limb_trace (" got ", inv); mp_limb_trace (" product ", prod); abort (); } }
/* (xp, n) = (qp, n)*d - ret*B^n and 0 <= ret < d */ mp_limb_t mpn_divrem_hensel_qr_1_1(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t d) { mp_size_t j; mp_limb_t c, h, q, dummy, h1, t, m; ASSERT(n > 0); ASSERT_MPN(xp, n); ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n)); ASSERT(d%2 == 1); modlimb_invert(m, d); c = 0; h = 0; t = 0; for (j = 0; j <= n - 1; j++) { h1 = xp[j]; t = h + c; if (t > h1) { h1 = h1 - t; c = 1; } else { h1 = h1 - t; c = 0; } q = h1*m; qp[j] = q; umul_ppmm(h, dummy, q, d); ASSERT(dummy == h1); } return h + c; }
void check_functions (void) { mp_limb_t wp[2], wp2[2], xp[2], yp[2], r; int i; memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; yp[0] = 456; mpn_add_n (wp, xp, yp, (mp_size_t) 1); ASSERT_ALWAYS (wp[0] == 579); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; wp[0] = 456; r = mpn_addmul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2)); ASSERT_ALWAYS (wp[0] == 702); ASSERT_ALWAYS (r == 0); } #if HAVE_NATIVE_mpn_copyd memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; xp[1] = 456; mpn_copyd (xp+1, xp, (mp_size_t) 1); ASSERT_ALWAYS (xp[1] == 123); } #endif #if HAVE_NATIVE_mpn_copyi memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; xp[1] = 456; mpn_copyi (xp, xp+1, (mp_size_t) 1); ASSERT_ALWAYS (xp[0] == 456); } #endif memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 1605; mpn_divexact_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(5)); ASSERT_ALWAYS (wp[0] == 321); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 1296; r = mpn_divexact_by3c (wp, xp, (mp_size_t) 1, CNST_LIMB(0)); ASSERT_ALWAYS (wp[0] == 432); ASSERT_ALWAYS (r == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 578; r = mpn_divexact_byfobm1 (wp, xp, (mp_size_t) 1, CNST_LIMB(17),CNST_LIMB(-1)/CNST_LIMB(17)); ASSERT_ALWAYS (wp[0] == 34); ASSERT_ALWAYS (r == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 287; r = mpn_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1, CNST_LIMB(7)); ASSERT_ALWAYS (wp[1] == 41); ASSERT_ALWAYS (wp[0] == 0); ASSERT_ALWAYS (r == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 290; r = mpn_divrem_euclidean_qr_1 (wp, 0, xp, (mp_size_t) 1, CNST_LIMB(7)); ASSERT_ALWAYS (wp[0] == 41); ASSERT_ALWAYS (r == 3); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 12; r = mpn_gcd_1 (xp, (mp_size_t) 1, CNST_LIMB(9)); ASSERT_ALWAYS (r == 3); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 0x1001; mpn_lshift (wp, xp, (mp_size_t) 1, 1); ASSERT_ALWAYS (wp[0] == 0x2002); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 14; r = mpn_mod_1 (xp, (mp_size_t) 1, CNST_LIMB(4)); ASSERT_ALWAYS (r == 2); } #if (GMP_NUMB_BITS % 4) == 0 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { int bits = (GMP_NUMB_BITS / 4) * 3; mp_limb_t mod = (CNST_LIMB(1) << bits) - 1; mp_limb_t want = GMP_NUMB_MAX % mod; xp[0] = GMP_NUMB_MAX; r = mpn_mod_34lsub1 (xp, (mp_size_t) 1); ASSERT_ALWAYS (r % mod == want); } #endif // DECL_modexact_1c_odd ((*modexact_1c_odd)); memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 14; r = mpn_mul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(4)); ASSERT_ALWAYS (wp[0] == 56); ASSERT_ALWAYS (r == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 5; yp[0] = 7; mpn_mul_basecase (wp, xp, (mp_size_t) 1, yp, (mp_size_t) 1); ASSERT_ALWAYS (wp[0] == 35); ASSERT_ALWAYS (wp[1] == 0); } #if HAVE_NATIVE_mpn_preinv_divrem_1 && GMP_NAIL_BITS == 0 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 0x101; r = mpn_preinv_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1, GMP_LIMB_HIGHBIT, refmpn_invert_limb (GMP_LIMB_HIGHBIT), 0); ASSERT_ALWAYS (wp[0] == 0x202); ASSERT_ALWAYS (wp[1] == 0); ASSERT_ALWAYS (r == 0); } #endif #if GMP_NAIL_BITS == 0 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = GMP_LIMB_HIGHBIT+123; r = mpn_preinv_mod_1 (xp, (mp_size_t) 1, GMP_LIMB_HIGHBIT, refmpn_invert_limb (GMP_LIMB_HIGHBIT)); ASSERT_ALWAYS (r == 123); } #endif memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 5; modlimb_invert(r,xp[0]); r=-r; yp[0]=43; yp[1]=75; mpn_redc_1 (wp, yp, xp, (mp_size_t) 1,r); ASSERT_ALWAYS (wp[0] == 78); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0]=5; yp[0]=3; mpn_sumdiff_n (wp, wp2,xp, yp,1); ASSERT_ALWAYS (wp[0] == 8); ASSERT_ALWAYS (wp2[0] == 2); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 0x8008; mpn_rshift (wp, xp, (mp_size_t) 1, 1); ASSERT_ALWAYS (wp[0] == 0x4004); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 5; mpn_sqr_basecase (wp, xp, (mp_size_t) 1); ASSERT_ALWAYS (wp[0] == 25); ASSERT_ALWAYS (wp[1] == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 999; yp[0] = 666; mpn_sub_n (wp, xp, yp, (mp_size_t) 1); ASSERT_ALWAYS (wp[0] == 333); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; wp[0] = 456; r = mpn_submul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2)); ASSERT_ALWAYS (wp[0] == 210); ASSERT_ALWAYS (r == 0); } }
mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t orig_c) { mp_limb_t s, h, l, inverse, dummy, dmul, ret; mp_limb_t c = orig_c; mp_size_t i; ASSERT (size >= 1); ASSERT (d & 1); ASSERT_MPN (src, size); ASSERT_LIMB (d); ASSERT_LIMB (c); if (size == 1) { s = src[0]; if (s > c) { l = s-c; h = l % d; if (h != 0) h = d - h; } else { l = c-s; h = l % d; } return h; } modlimb_invert (inverse, d); dmul = d << GMP_NAIL_BITS; i = 0; do { s = src[i]; SUBC_LIMB (c, l, s, c); l = (l * inverse) & GMP_NUMB_MASK; umul_ppmm (h, dummy, l, dmul); c += h; } while (++i < size-1); s = src[i]; if (s <= d) { /* With high<=d the final step can be a subtract and addback. If c==0 then the addback will restore to l>=0. If c==d then will get l==d if s==0, but that's ok per the function definition. */ l = c - s; if (c < s) l += d; ret = l; } else { /* Can't skip a divide, just do the loop code once more. */ SUBC_LIMB (c, l, s, c); l = (l * inverse) & GMP_NUMB_MASK; umul_ppmm (h, dummy, l, dmul); c += h; ret = c; } ASSERT (orig_c < d ? ret < d : ret <= d); return ret; }
/* For now, also disable REDC when MOD is even, as the inverse can't handle that. At some point, we might want to make the code faster for that case, perhaps using CRR. */ #ifndef POWM_THRESHOLD #define POWM_THRESHOLD ((8 * SQR_KARATSUBA_THRESHOLD) / 3) #endif #define HANDLE_NEGATIVE_EXPONENT 1 #undef REDUCE_EXPONENT void #ifndef BERKELEY_MP mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m) #else /* BERKELEY_MP */ pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r) #endif /* BERKELEY_MP */ { mp_ptr xp, tp, qp, gp, this_gp; mp_srcptr bp, ep, mp; mp_size_t bn, es, en, mn, xn; mp_limb_t invm, c; unsigned long int enb; mp_size_t i, K, j, l, k; int m_zero_cnt, e_zero_cnt; int sh; int use_redc; #if HANDLE_NEGATIVE_EXPONENT mpz_t new_b; #endif #if REDUCE_EXPONENT mpz_t new_e; #endif TMP_DECL (marker); mp = PTR(m); mn = ABSIZ (m); if (mn == 0) DIVIDE_BY_ZERO; TMP_MARK (marker); es = SIZ (e); if (es <= 0) { if (es == 0) { /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if m equals 1. */ SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; PTR(r)[0] = 1; TMP_FREE (marker); /* we haven't really allocated anything here */ return; } #if HANDLE_NEGATIVE_EXPONENT MPZ_TMP_INIT (new_b, mn + 1); if (! mpz_invert (new_b, b, m)) DIVIDE_BY_ZERO; b = new_b; es = -es; #else DIVIDE_BY_ZERO; #endif } en = es; #if REDUCE_EXPONENT /* Reduce exponent by dividing it by phi(m) when m small. */ if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150) { MPZ_TMP_INIT (new_e, 2); mpz_mod_ui (new_e, e, phi (mp[0])); e = new_e; } #endif use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0; if (use_redc) { /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */ modlimb_invert (invm, mp[0]); invm = -invm; } else { /* Normalize m (i.e. make its most significant bit set) as required by division functions below. */ count_leading_zeros (m_zero_cnt, mp[mn - 1]); m_zero_cnt -= GMP_NAIL_BITS; if (m_zero_cnt != 0) { mp_ptr new_mp; new_mp = TMP_ALLOC_LIMBS (mn); mpn_lshift (new_mp, mp, mn, m_zero_cnt); mp = new_mp; } } /* Determine optimal value of k, the number of exponent bits we look at at a time. */ count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]); e_zero_cnt -= GMP_NAIL_BITS; enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */ k = 1; K = 2; while (2 * enb > K * (2 + k * (3 + k))) { k++; K *= 2; } tp = TMP_ALLOC_LIMBS (2 * mn + 1); qp = TMP_ALLOC_LIMBS (mn + 1); gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn); /* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */ bn = ABSIZ (b); bp = PTR(b); /* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary for speed or correctness to do this when b and m have the same number of limbs, perhaps remove mpn_cmp call. */ if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0)) { /* Reduce possibly huge base while moving it to gp[0]. Use a function call to reduce, since we don't want the quotient allocation to live until function return. */ if (use_redc) { reduce (tp + mn, bp, bn, mp, mn); /* b mod m */ MPN_ZERO (tp, mn); mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */ } else { reduce (gp, bp, bn, mp, mn); } } else { /* |b| < m. We pad out operands to become mn limbs, which simplifies the rest of the function, but slows things down when the |b| << m. */ if (use_redc) { MPN_ZERO (tp, mn); MPN_COPY (tp + mn, bp, bn); MPN_ZERO (tp + mn + bn, mn - bn); mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); } else { MPN_COPY (gp, bp, bn); MPN_ZERO (gp + bn, mn - bn); } } /* Compute xx^i for odd g < 2^i. */ xp = TMP_ALLOC_LIMBS (mn); mpn_sqr_n (tp, gp, mn); if (use_redc) redc (xp, mp, mn, invm, tp); /* xx = x^2*R^n */ else mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); this_gp = gp; for (i = 1; i < K / 2; i++) { mpn_mul_n (tp, this_gp, xp, mn); this_gp += mn; if (use_redc) redc (this_gp, mp, mn, invm, tp); /* g[i] = x^(2i+1)*R^n */ else mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn); } /* Start the real stuff. */ ep = PTR (e); i = en - 1; /* current index */ c = ep[i]; /* current limb */ sh = GMP_NUMB_BITS - e_zero_cnt; /* significant bits in ep[i] */ sh -= k; /* index of lower bit of ep[i] to take into account */ if (sh < 0) { /* k-sh extra bits are needed */ if (i > 0) { i--; c <<= (-sh); sh += GMP_NUMB_BITS; c |= ep[i] >> sh; } }
int main (void) { gmp_randstate_t rands; int j, n; mp_limb_t cp1[1000], cp2[1000], mp[1000], tp1[1000], tp2[1000], inv; tests_start (); gmp_randinit_default (rands); for (n = 1; n < 100; n++) { for (j = 1; j < 100; j++) { mpn_randomb (mp, rands, n); mp[0] |= 1; modlimb_invert (inv, mp[0]); inv = -inv; mpn_randomb (tp1, rands, 2 * n); MPN_COPY (tp2, tp1, 2 * n); ref_redc_1 (cp1, tp1, mp, n, inv); mpn_redc_1 (cp2, tp2, mp, n, inv); if (mpn_cmp (cp1, cp2, n) != 0) { printf ("mpn_redc_1 error %d\n", n); abort (); } if (n != 1 && mpn_cmp (tp1, tp2, 2 * n) != 0) { printf ("mpn_redc_1 possible error\n"); abort (); } /* we dont require the above to be the same but it could be a useful test */ } } for (n = 1; n < 100; n++) { for (j = 1; j < 100; j++) { mpn_rrandom (mp, rands, n); mp[0] |= 1; modlimb_invert (inv, mp[0]); inv = -inv; mpn_rrandom (tp1, rands, 2 * n); MPN_COPY (tp2, tp1, 2 * n); ref_redc_1 (cp1, tp1, mp, n, inv); mpn_redc_1 (cp2, tp2, mp, n, inv); if (mpn_cmp (cp1, cp2, n) != 0) { printf ("mpn_redc_1 error %d\n", n); abort (); } if (n != 1 && mpn_cmp (tp1, tp2, 2 * n) != 0) { printf ("mpn_redc_1 possible error\n"); abort (); } /* we dont require the above to be the same but it could be a useful test */ } } gmp_randclear (rands); tests_end (); exit (0); }