void compute_off_adj(QS_t * qs_inf, poly_t * poly_inf) { unsigned long num_primes = qs_inf->num_primes; unsigned long * A = poly_inf->A; unsigned long * B = poly_inf->B; uint32_t * A_inv = poly_inf->A_inv; uint32_t ** A_inv2B = poly_inf->A_inv2B; unsigned long * B_terms = poly_inf->B_terms; uint32_t * soln1 = poly_inf->soln1; uint32_t* soln2 = poly_inf->soln2; uint32_t * sqrts = qs_inf->sqrts; prime_t * factor_base = qs_inf->factor_base; unsigned long sieve_size = qs_inf->sieve_size; unsigned long s = poly_inf->s; unsigned long p, temp; unsigned limbs = qs_inf->prec+1; double pinv; for (unsigned long i = 2; i < num_primes; i++) // skip k and 2 { p = factor_base[i].p; pinv = factor_base[i].pinv; A_inv[i] = z_invert(mpn_mod_1(A+1, A[0], p), p); for (unsigned long j = 0; j < s; j++) { temp = mpn_mod_1(B_terms + j*limbs + 1, B_terms[j*limbs], p); temp = z_mulmod_precomp(temp, A_inv[i], p, pinv); temp *= 2; if (temp >= p) temp -= p; A_inv2B[j][i] = temp; } temp = mpn_mod_1(B+1, B[0], p); temp = sqrts[i] + p - temp; #if FLINT_BITS == 64 temp *= A_inv[i]; #else temp = z_mulmod2_precomp(temp, A_inv[i], p, pinv); #endif temp += sieve_size/2; soln1[i] = z_mod2_precomp(temp, p, pinv); // Consider using long_mod_precomp temp = p - sqrts[i]; if (temp == p) temp -= p; temp = z_mulmod_precomp(temp, A_inv[i], p, pinv); temp *= 2; if (temp >= p) temp -= p; soln2[i] = temp+soln1[i]; if (soln2[i] >= p) soln2[i] -= p; } }
static Py_hash_t Pympq_hash(PympqObject *self) { #ifdef _PyHASH_MODULUS Py_hash_t hash = 0; mpz_t temp, temp1, mask; if (self->hash_cache != -1) return self->hash_cache; mpz_inoc(temp); mpz_inoc(temp1); mpz_inoc(mask); mpz_set_si(mask, 1); mpz_mul_2exp(mask, mask, _PyHASH_BITS); mpz_sub_ui(mask, mask, 1); if (!mpz_invert(temp, mpq_denref(self->q), mask)) { mpz_cloc(temp); mpz_cloc(temp1); mpz_cloc(mask); hash = _PyHASH_INF; if (mpz_sgn(mpq_numref(self->q))<0) hash = -hash; self->hash_cache = hash; return hash; } mpz_set(temp1, mask); mpz_sub_ui(temp1, temp1, 2); mpz_powm(temp, mpq_denref(self->q), temp1, mask); mpz_tdiv_r(temp1, mpq_numref(self->q), mask); mpz_mul(temp, temp, temp1); hash = (Py_hash_t)mpn_mod_1(temp->_mp_d, mpz_size(temp), _PyHASH_MODULUS); if (mpz_sgn(mpq_numref(self->q))<0) hash = -hash; if (hash==-1) hash = -2; mpz_cloc(temp); mpz_cloc(temp1); mpz_cloc(mask); self->hash_cache = hash; return hash; #else PyObject *temp; if (self->hash_cache != -1) return self->hash_cache; if (!(temp = Pympq_To_PyFloat(self))) { SYSTEM_ERROR("Could not convert 'mpq' to float."); return -1; } self->hash_cache = PyObject_Hash(temp); Py_DECREF(temp); return self->hash_cache; #endif }
int mpn_divisible_p (mp_srcptr ap, mp_size_t asize, mp_srcptr dp, mp_size_t dsize) { mp_limb_t alow, dlow, dmask; mp_ptr qp, rp; mp_size_t i; TMP_DECL; ASSERT (asize >= 0); ASSERT (asize == 0 || ap[asize-1] != 0); ASSERT (dsize >= 1); ASSERT (dp[dsize-1] != 0); ASSERT_MPN (ap, asize); ASSERT_MPN (dp, dsize); /* When a<d only a==0 is divisible. Notice this test covers all cases of asize==0. */ if (asize < dsize) return (asize == 0); /* Strip low zero limbs from d, requiring a==0 on those. */ for (;;) { alow = *ap; dlow = *dp; if (dlow != 0) break; if (alow != 0) return 0; /* a has fewer low zero limbs than d, so not divisible */ /* a!=0 and d!=0 so won't get to size==0 */ asize--; ASSERT (asize >= 1); dsize--; ASSERT (dsize >= 1); ap++; dp++; } /* a must have at least as many low zero bits as d */ dmask = LOW_ZEROS_MASK (dlow); if ((alow & dmask) != 0) return 0; if (dsize == 1) { if (BELOW_THRESHOLD (asize, MODEXACT_1_ODD_THRESHOLD)) return mpn_mod_1 (ap, asize, dlow) == 0; if ((dlow & 1) == 0) { unsigned twos; count_trailing_zeros (twos, dlow); dlow >>= twos; } return mpn_modexact_1_odd (ap, asize, dlow) == 0; }
unsigned long fmpz_mod_ui(const fmpz_t input, const unsigned long x) { unsigned long size = FLINT_ABS(input[0]); unsigned long mod; mod = mpn_mod_1(input+1, size, x); if (!mod) return mod; else if ((long) input[0] < 0L) { return x - mod; } else return mod; }
int mpz_divisible_ui_p (mpz_srcptr a, unsigned long d) { mp_size_t asize; mp_ptr ap; unsigned twos; asize = SIZ(a); if (UNLIKELY (d == 0)) return (asize == 0); if (asize == 0) /* 0 divisible by any d */ return 1; /* For nails don't try to be clever if d is bigger than a limb, just fake up an mpz_t and go to the main mpz_divisible_p. */ if (d > GMP_NUMB_MAX) { mp_limb_t dlimbs[2]; mpz_t dz; ALLOC(dz) = 2; PTR(dz) = dlimbs; mpz_set_ui (dz, d); return mpz_divisible_p (a, dz); } ap = PTR(a); asize = ABS(asize); /* ignore sign of a */ if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD)) return mpn_mod_1 (ap, asize, (mp_limb_t) d) == 0; if (! (d & 1)) { /* Strip low zero bits to get odd d required by modexact. If d==e*2^n and a is divisible by 2^n and by e, then it's divisible by d. */ if ((ap[0] & LOW_ZEROS_MASK (d)) != 0) return 0; count_trailing_zeros (twos, (mp_limb_t) d); d >>= twos; } return mpn_modexact_1_odd (ap, asize, (mp_limb_t) d) == 0; }
/* Return {xp, xn} mod p. Assume 2p < B where B = 2^GMP_NUMB_LIMB. We first compute {xp, xn} / B^n mod p using Montgomery reduction, where the number N to factor has n limbs. Then we multiply by B^(n+1) mod p (precomputed) and divide by B mod p. Assume invm = -1/p mod B and Bpow = B^n mod p */ static mp_limb_t ecm_mod_1 (mp_ptr xp, mp_size_t xn, mp_limb_t p, mp_size_t n, mp_limb_t invm, mp_limb_t Bpow) { mp_limb_t q, cy, hi, lo, x0, x1; if (xn == 0) return 0; /* the code below assumes xn <= n+1, thus we call mpn_mod_1 otherwise, but this should never (or rarely) happen */ if (xn > n + 1) return mpn_mod_1 (xp, xn, p); x0 = xp[0]; cy = (mp_limb_t) 0; while (n-- > 0) { /* Invariant: cy is the input carry on xp[1], x0 is xp[0] */ x1 = (xn > 1) ? xp[1] : 0; q = x0 * invm; /* q = -x0/p mod B */ umul_ppmm (hi, lo, q, p); /* hi*B + lo = -x0 mod B */ /* Add hi*B + lo to x1*B + x0. Since p <= B-2 we have hi*B + lo <= (B-1)(B-2) = B^2-3B+2, thus hi <= B-3 */ hi += cy + (lo != 0); /* cannot overflow */ x0 = x1 + hi; cy = x0 < hi; xn --; xp ++; } if (cy != 0) x0 -= p; /* now x0 = {xp, xn} / B^n mod p */ umul_ppmm (x1, x0, x0, Bpow); /* since Bpow < p, x1 <= p-1 */ q = x0 * invm; umul_ppmm (hi, lo, q, p); /* hi <= p-1 thus hi+x1+1 < 2p-1 < B */ hi = hi + x1 + (lo != 0); while (hi >= p) hi -= p; return hi; }
unsigned long compute_factor_base(QS_t * qs_inf) { unsigned long fb_prime = 2; unsigned long multiplier = qs_inf->k; prime_t * factor_base = qs_inf->factor_base; uint32_t * sqrts = qs_inf->sqrts; unsigned long num_primes = num_FB_primes(qs_inf->bits); unsigned long prime, nmod; double pinv; fmpz_t n = qs_inf->n; long kron; factor_base[0].p = multiplier; factor_base[0].pinv = z_precompute_inverse(multiplier); factor_base[1].p = 2; prime = 2; while (fb_prime < num_primes) { prime = z_nextprime(prime, 0); pinv = z_precompute_inverse(prime); nmod = mpn_mod_1(n + 1, n[0], prime); if (nmod == 0) { if (z_mod_precomp(multiplier, prime, pinv) != 0) return prime; } kron = z_jacobi(nmod, prime); if (kron == 1) { factor_base[fb_prime].p = prime; factor_base[fb_prime].pinv = pinv; sqrts[fb_prime] = z_sqrtmod(nmod, prime); fb_prime++; } } printf("Largest prime = %ld\n", prime); qs_inf->num_primes = fb_prime; return 0; }
/* convert mpzvi to CRT representation, fast version, assumes mpzspm->T has been precomputed (see mpzspm.c) */ static void mpzspv_from_mpzv_fast (mpzspv_t x, const spv_size_t offset, mpz_t mpzvi, mpzspm_t mpzspm) { const unsigned int sp_num = mpzspm->sp_num; unsigned int i, j, k, i0 = I0_THRESHOLD, I0; mpzv_t *T = mpzspm->T; unsigned int d = mpzspm->d, ni; ASSERT (d > i0); /* T[0] serves as vector of temporary mpz_t's, since it contains the small primes, which are also in mpzspm->spm[j]->sp */ /* initially we split mpzvi in two */ ni = 1 << (d - 1); mpz_mod (T[0][0], mpzvi, T[d-1][0]); mpz_mod (T[0][ni], mpzvi, T[d-1][1]); for (i = d-1; i-- > i0;) { /* goes down from depth i+1 to i */ ni = 1 << i; for (j = k = 0; j + ni < sp_num; j += 2*ni, k += 2) { mpz_mod (T[0][j+ni], T[0][j], T[i][k+1]); mpz_mod (T[0][j], T[0][j], T[i][k]); } /* for the last entry T[0][j] if j < sp_num, there is nothing to do */ } /* last steps */ I0 = 1 << i0; for (j = 0; j < sp_num; j += I0) for (k = j; k < j + I0 && k < sp_num; k++) x[k][offset] = mpn_mod_1 (PTR(T[0][j]), SIZ(T[0][j]), (mp_limb_t) mpzspm->spm[k]->sp); /* The typecast to mp_limb_t assumes that mp_limb_t is at least as wide as sp_t */ }
/* B&S: ecrt mod m mod p_j. * * memory: MPZSPV_NORMALISE_STRIDE mpzspv coeffs * 6 * MPZSPV_NORMALISE_STRIDE sp's * MPZSPV_NORMALISE_STRIDE floats */ void mpzspv_normalise (mpzspv_t x, spv_size_t offset, spv_size_t len, mpzspm_t mpzspm) { unsigned int i, j, sp_num = mpzspm->sp_num; spv_size_t k, l; sp_t v; spv_t s, d, w; spm_t *spm = mpzspm->spm; float prime_recip; float *f; mpzspv_t t; ASSERT (mpzspv_verify (x, offset, len, mpzspm)); f = (float *) malloc (MPZSPV_NORMALISE_STRIDE * sizeof (float)); s = (spv_t) malloc (3 * MPZSPV_NORMALISE_STRIDE * sizeof (sp_t)); d = (spv_t) malloc (3 * MPZSPV_NORMALISE_STRIDE * sizeof (sp_t)); if (f == NULL || s == NULL || d == NULL) { fprintf (stderr, "Cannot allocate memory in mpzspv_normalise\n"); exit (1); } t = mpzspv_init (MPZSPV_NORMALISE_STRIDE, mpzspm); memset (s, 0, 3 * MPZSPV_NORMALISE_STRIDE * sizeof (sp_t)); for (l = 0; l < len; l += MPZSPV_NORMALISE_STRIDE) { spv_size_t stride = MIN (MPZSPV_NORMALISE_STRIDE, len - l); /* FIXME: use B&S Theorem 2.2 */ for (k = 0; k < stride; k++) f[k] = 0.5; for (i = 0; i < sp_num; i++) { prime_recip = 1.0f / (float) spm[i]->sp; for (k = 0; k < stride; k++) { x[i][l + k + offset] = sp_mul (x[i][l + k + offset], mpzspm->crt3[i], spm[i]->sp, spm[i]->mul_c); f[k] += (float) x[i][l + k + offset] * prime_recip; } } for (i = 0; i < sp_num; i++) { for (k = 0; k < stride; k++) { umul_ppmm (d[3 * k + 1], d[3 * k], mpzspm->crt5[i], (sp_t) f[k]); d[3 * k + 2] = 0; } for (j = 0; j < sp_num; j++) { w = x[j] + offset; v = mpzspm->crt4[i][j]; for (k = 0; k < stride; k++) umul_ppmm (s[3 * k + 1], s[3 * k], w[k + l], v); /* this mpn_add_n accounts for about a third of the function's * runtime */ mpn_add_n (d, d, s, 3 * stride); } for (k = 0; k < stride; k++) t[i][k] = mpn_mod_1 (d + 3 * k, 3, spm[i]->sp); } mpzspv_set (x, l + offset, t, 0, stride, mpzspm); } mpzspv_clear (t, mpzspm); free (s); free (d); free (f); }
void compute_B_terms(QS_t * qs_inf, poly_t * poly_inf) { unsigned long s = poly_inf->s; unsigned long * A_ind = poly_inf->A_ind; unsigned long * A_modp = poly_inf->A_modp; unsigned long * B_terms = poly_inf->B_terms; prime_t * factor_base = qs_inf->factor_base; unsigned long limbs = qs_inf->prec+1; unsigned long limbs2; unsigned long * A = poly_inf->A; unsigned long * B = poly_inf->B; unsigned long p, i; unsigned long * temp1 = (unsigned long *) flint_stack_alloc(limbs); unsigned long temp; mp_limb_t msl; double pinv; for (i = 0; i < s; i++) { p = factor_base[A_ind[i]].p; pinv = z_precompute_inverse(p); mpn_divmod_1(temp1 + 1, A + 1, A[0], p); temp1[0] = A[0] - (temp1[A[0]] == 0); A_modp[i] = (temp = mpn_mod_1(temp1 + 1, temp1[0], p)); temp = z_invert(temp, p); temp = z_mulmod_precomp(temp, qs_inf->sqrts[A_ind[i]], p, pinv); if (temp > p/2) temp = p - temp; msl = mpn_mul_1(B_terms + i*limbs + 1, temp1 + 1, temp1[0], temp); if (msl) { B_terms[i*limbs + temp1[0] + 1] = msl; B_terms[i*limbs] = temp1[0] + 1; } else B_terms[i*limbs] = temp1[0]; #if B_TERMS mpz_t temp; mpz_init(temp); fmpz_to_mpz(temp, B_terms + i*limbs); gmp_printf("B_%ld = %Zd\n", i, temp); mpz_clear(temp); #endif } F_mpn_copy(B, B_terms, B_terms[0]+1); // Set B to the sum of the B terms if (limbs > B_terms[0] + 1) F_mpn_clear(B + B_terms[0] + 1, limbs - B_terms[0] - 1); for (i = 1; i < s; i++) { limbs2 = B_terms[i*limbs]; msl = mpn_add_n(B+1, B+1, B_terms + i*limbs + 1, limbs2); if (msl) mpn_add_1(B + limbs2 + 1, B + limbs2 + 1, limbs - limbs2 - 1, msl); } B[0] = limbs - 1; while (!B[B[0]] && B[0]) B[0]--; #if B_TERMS mpz_t temp2; mpz_init(temp2); fmpz_to_mpz(temp2, B); gmp_printf("B = %Zd\n", temp2); mpz_clear(temp2); #endif flint_stack_release(); // release temp1 }
void check_functions (void) { mp_limb_t wp[2], wp2[2], xp[2], yp[2], r; int i; memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; yp[0] = 456; mpn_add_n (wp, xp, yp, (mp_size_t) 1); ASSERT_ALWAYS (wp[0] == 579); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; wp[0] = 456; r = mpn_addmul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2)); ASSERT_ALWAYS (wp[0] == 702); ASSERT_ALWAYS (r == 0); } #if HAVE_NATIVE_mpn_copyd memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; xp[1] = 456; mpn_copyd (xp+1, xp, (mp_size_t) 1); ASSERT_ALWAYS (xp[1] == 123); } #endif #if HAVE_NATIVE_mpn_copyi memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; xp[1] = 456; mpn_copyi (xp, xp+1, (mp_size_t) 1); ASSERT_ALWAYS (xp[0] == 456); } #endif memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 1605; mpn_divexact_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(5)); ASSERT_ALWAYS (wp[0] == 321); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 1296; r = mpn_divexact_by3c (wp, xp, (mp_size_t) 1, CNST_LIMB(0)); ASSERT_ALWAYS (wp[0] == 432); ASSERT_ALWAYS (r == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 578; r = mpn_divexact_byfobm1 (wp, xp, (mp_size_t) 1, CNST_LIMB(17),CNST_LIMB(-1)/CNST_LIMB(17)); ASSERT_ALWAYS (wp[0] == 34); ASSERT_ALWAYS (r == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 287; r = mpn_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1, CNST_LIMB(7)); ASSERT_ALWAYS (wp[1] == 41); ASSERT_ALWAYS (wp[0] == 0); ASSERT_ALWAYS (r == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 290; r = mpn_divrem_euclidean_qr_1 (wp, 0, xp, (mp_size_t) 1, CNST_LIMB(7)); ASSERT_ALWAYS (wp[0] == 41); ASSERT_ALWAYS (r == 3); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 12; r = mpn_gcd_1 (xp, (mp_size_t) 1, CNST_LIMB(9)); ASSERT_ALWAYS (r == 3); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 0x1001; mpn_lshift (wp, xp, (mp_size_t) 1, 1); ASSERT_ALWAYS (wp[0] == 0x2002); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 14; r = mpn_mod_1 (xp, (mp_size_t) 1, CNST_LIMB(4)); ASSERT_ALWAYS (r == 2); } #if (GMP_NUMB_BITS % 4) == 0 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { int bits = (GMP_NUMB_BITS / 4) * 3; mp_limb_t mod = (CNST_LIMB(1) << bits) - 1; mp_limb_t want = GMP_NUMB_MAX % mod; xp[0] = GMP_NUMB_MAX; r = mpn_mod_34lsub1 (xp, (mp_size_t) 1); ASSERT_ALWAYS (r % mod == want); } #endif // DECL_modexact_1c_odd ((*modexact_1c_odd)); memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 14; r = mpn_mul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(4)); ASSERT_ALWAYS (wp[0] == 56); ASSERT_ALWAYS (r == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 5; yp[0] = 7; mpn_mul_basecase (wp, xp, (mp_size_t) 1, yp, (mp_size_t) 1); ASSERT_ALWAYS (wp[0] == 35); ASSERT_ALWAYS (wp[1] == 0); } #if HAVE_NATIVE_mpn_preinv_divrem_1 && GMP_NAIL_BITS == 0 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 0x101; r = mpn_preinv_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1, GMP_LIMB_HIGHBIT, refmpn_invert_limb (GMP_LIMB_HIGHBIT), 0); ASSERT_ALWAYS (wp[0] == 0x202); ASSERT_ALWAYS (wp[1] == 0); ASSERT_ALWAYS (r == 0); } #endif #if GMP_NAIL_BITS == 0 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = GMP_LIMB_HIGHBIT+123; r = mpn_preinv_mod_1 (xp, (mp_size_t) 1, GMP_LIMB_HIGHBIT, refmpn_invert_limb (GMP_LIMB_HIGHBIT)); ASSERT_ALWAYS (r == 123); } #endif memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 5; modlimb_invert(r,xp[0]); r=-r; yp[0]=43; yp[1]=75; mpn_redc_1 (wp, yp, xp, (mp_size_t) 1,r); ASSERT_ALWAYS (wp[0] == 78); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0]=5; yp[0]=3; mpn_sumdiff_n (wp, wp2,xp, yp,1); ASSERT_ALWAYS (wp[0] == 8); ASSERT_ALWAYS (wp2[0] == 2); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 0x8008; mpn_rshift (wp, xp, (mp_size_t) 1, 1); ASSERT_ALWAYS (wp[0] == 0x4004); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 5; mpn_sqr_basecase (wp, xp, (mp_size_t) 1); ASSERT_ALWAYS (wp[0] == 25); ASSERT_ALWAYS (wp[1] == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 999; yp[0] = 666; mpn_sub_n (wp, xp, yp, (mp_size_t) 1); ASSERT_ALWAYS (wp[0] == 333); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; wp[0] = 456; r = mpn_submul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2)); ASSERT_ALWAYS (wp[0] == 210); ASSERT_ALWAYS (r == 0); } }
/* assume y > x > 0. return y mod x */ static ulong resiu(GEN y, ulong x) { return mpn_mod_1(LIMBS(y), NLIMBS(y), x); }
int mpz_probab_prime_p (mpz_srcptr n, int reps) { mp_limb_t r; /* Handle small and negative n. */ if (mpz_cmp_ui (n, 1000000L) <= 0) { int is_prime; if (mpz_sgn (n) < 0) { /* Negative number. Negate and call ourselves. */ mpz_t n2; mpz_init (n2); mpz_neg (n2, n); is_prime = mpz_probab_prime_p (n2, reps); mpz_clear (n2); return is_prime; } is_prime = isprime (mpz_get_ui (n)); return is_prime ? 2 : 0; } /* If n is now even, it is not a prime. */ if ((mpz_get_ui (n) & 1) == 0) return 0; #if defined (PP) /* Check if n has small factors. */ #if defined (PP_INVERTED) r = MPN_MOD_OR_PREINV_MOD_1 (PTR(n), SIZ(n), (mp_limb_t) PP, (mp_limb_t) PP_INVERTED); #else r = mpn_mod_1 (PTR(n), SIZ(n), (mp_limb_t) PP); #endif if (r % 3 == 0 #if BITS_PER_MP_LIMB >= 4 || r % 5 == 0 #endif #if BITS_PER_MP_LIMB >= 8 || r % 7 == 0 #endif #if BITS_PER_MP_LIMB >= 16 || r % 11 == 0 || r % 13 == 0 #endif #if BITS_PER_MP_LIMB >= 32 || r % 17 == 0 || r % 19 == 0 || r % 23 == 0 || r % 29 == 0 #endif #if BITS_PER_MP_LIMB >= 64 || r % 31 == 0 || r % 37 == 0 || r % 41 == 0 || r % 43 == 0 || r % 47 == 0 || r % 53 == 0 #endif ) { return 0; } #endif /* PP */ /* Do more dividing. We collect small primes, using umul_ppmm, until we overflow a single limb. We divide our number by the small primes product, and look for factors in the remainder. */ { unsigned long int ln2; unsigned long int q; mp_limb_t p1, p0, p; unsigned int primes[15]; int nprimes; nprimes = 0; p = 1; ln2 = mpz_sizeinbase (n, 2) / 30; ln2 = ln2 * ln2; for (q = PP_FIRST_OMITTED; q < ln2; q += 2) { if (isprime (q)) { umul_ppmm (p1, p0, p, q); if (p1 != 0) { r = mpn_mod_1 (PTR(n), SIZ(n), p); while (--nprimes >= 0) if (r % primes[nprimes] == 0) { ASSERT_ALWAYS (mpn_mod_1 (PTR(n), SIZ(n), (mp_limb_t) primes[nprimes]) == 0); return 0; } p = q; nprimes = 0; } else { p = p0; } primes[nprimes++] = q; } } } /* Perform a number of Miller-Rabin tests. */ return mpz_millerrabin (n, reps); }
int mpz_probab_prime_p (mpz_srcptr n, int reps) { mp_limb_t r; mpz_t n2; /* Handle small and negative n. */ if (mpz_cmp_ui (n, 1000000L) <= 0) { if (mpz_cmpabs_ui (n, 1000000L) <= 0) { int is_prime; unsigned long n0; n0 = mpz_get_ui (n); is_prime = n0 & (n0 > 1) ? isprime (n0) : n0 == 2; return is_prime ? 2 : 0; } /* Negative number. Negate and fall out. */ PTR(n2) = PTR(n); SIZ(n2) = -SIZ(n); n = n2; } /* If n is now even, it is not a prime. */ if (mpz_even_p (n)) return 0; #if defined (PP) /* Check if n has small factors. */ #if defined (PP_INVERTED) r = MPN_MOD_OR_PREINV_MOD_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) PP, (mp_limb_t) PP_INVERTED); #else r = mpn_mod_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) PP); #endif if (r % 3 == 0 #if GMP_LIMB_BITS >= 4 || r % 5 == 0 #endif #if GMP_LIMB_BITS >= 8 || r % 7 == 0 #endif #if GMP_LIMB_BITS >= 16 || r % 11 == 0 || r % 13 == 0 #endif #if GMP_LIMB_BITS >= 32 || r % 17 == 0 || r % 19 == 0 || r % 23 == 0 || r % 29 == 0 #endif #if GMP_LIMB_BITS >= 64 || r % 31 == 0 || r % 37 == 0 || r % 41 == 0 || r % 43 == 0 || r % 47 == 0 || r % 53 == 0 #endif ) { return 0; } #endif /* PP */ /* Do more dividing. We collect small primes, using umul_ppmm, until we overflow a single limb. We divide our number by the small primes product, and look for factors in the remainder. */ { unsigned long int ln2; unsigned long int q; mp_limb_t p1, p0, p; unsigned int primes[15]; int nprimes; nprimes = 0; p = 1; ln2 = mpz_sizeinbase (n, 2); /* FIXME: tune this limit */ for (q = PP_FIRST_OMITTED; q < ln2; q += 2) { if (isprime (q)) { umul_ppmm (p1, p0, p, q); if (p1 != 0) { r = MPN_MOD_OR_MODEXACT_1_ODD (PTR(n), (mp_size_t) SIZ(n), p); while (--nprimes >= 0) if (r % primes[nprimes] == 0) { ASSERT_ALWAYS (mpn_mod_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) primes[nprimes]) == 0); return 0; } p = q; nprimes = 0; } else { p = p0; } primes[nprimes++] = q; } } } /* Perform a number of Miller-Rabin tests. */ return mpz_millerrabin (n, reps); }
int mpz_congruent_ui_p (mpz_srcptr a, unsigned long cu, unsigned long du) { mp_srcptr ap; mp_size_t asize; mp_limb_t c, d, r; if (UNLIKELY (du == 0)) return (mpz_cmp_ui (a, cu) == 0); asize = SIZ(a); if (asize == 0) { if (cu < du) return cu == 0; else return (cu % du) == 0; } /* For nails don't try to be clever if c or d is bigger than a limb, just fake up some mpz_t's and go to the main mpz_congruent_p. */ if (du > GMP_NUMB_MAX || cu > GMP_NUMB_MAX) { mp_limb_t climbs[2], dlimbs[2]; mpz_t cz, dz; ALLOC(cz) = 2; PTR(cz) = climbs; ALLOC(dz) = 2; PTR(dz) = dlimbs; mpz_set_ui (cz, cu); mpz_set_ui (dz, du); return mpz_congruent_p (a, cz, dz); } /* NEG_MOD works on limbs, so convert ulong to limb */ c = cu; d = du; if (asize < 0) { asize = -asize; NEG_MOD (c, c, d); } ap = PTR (a); if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD)) { r = mpn_mod_1 (ap, asize, d); if (c < d) return r == c; else return r == (c % d); } if ((d & 1) == 0) { /* Strip low zero bits to get odd d required by modexact. If d==e*2^n then a==c mod d if and only if both a==c mod 2^n and a==c mod e. */ unsigned twos; if ((ap[0]-c) & LOW_ZEROS_MASK (d)) return 0; count_trailing_zeros (twos, d); d >>= twos; }
static void check_one (mp_srcptr ap, mp_size_t n, mp_limb_t b) { mp_limb_t r_ref = refmpn_mod_1 (ap, n, b); mp_limb_t r; if (n >= 2) { mp_limb_t pre[4]; mpn_mod_1_1p_cps (pre, b); r = mpn_mod_1_1p (ap, n, b << pre[1], pre); if (r != r_ref) { printf ("mpn_mod_1_1p failed\n"); goto fail; } } if ((b & GMP_NUMB_HIGHBIT) == 0) { mp_limb_t pre[5]; mpn_mod_1s_2p_cps (pre, b); r = mpn_mod_1s_2p (ap, n, b << pre[1], pre); if (r != r_ref) { printf ("mpn_mod_1s_2p failed\n"); goto fail; } } if (b <= GMP_NUMB_MASK / 3) { mp_limb_t pre[6]; mpn_mod_1s_3p_cps (pre, b); r = mpn_mod_1s_3p (ap, n, b << pre[1], pre); if (r != r_ref) { printf ("mpn_mod_1s_3p failed\n"); goto fail; } } if (b <= GMP_NUMB_MASK / 4) { mp_limb_t pre[7]; mpn_mod_1s_4p_cps (pre, b); r = mpn_mod_1s_4p (ap, n, b << pre[1], pre); if (r != r_ref) { printf ("mpn_mod_1s_4p failed\n"); goto fail; } } r = mpn_mod_1 (ap, n, b); if (r != r_ref) { printf ("mpn_mod_1 failed\n"); fail: printf ("an = %d, a: ", (int) n); mpn_dump (ap, n); printf ("b : "); mpn_dump (&b, 1); printf ("r (expected): "); mpn_dump (&r_ref, 1); printf ("r (bad) : "); mpn_dump (&r, 1); abort(); } }
int mpz_congruent_p (mpz_srcptr a, mpz_srcptr c, mpz_srcptr d) { mp_size_t asize, csize, dsize, sign; mp_srcptr ap, cp, dp; mp_ptr xp; mp_limb_t alow, clow, dlow, dmask, r; int result; TMP_DECL; dsize = SIZ(d); if (UNLIKELY (dsize == 0)) return (mpz_cmp (a, c) == 0); dsize = ABS(dsize); dp = PTR(d); if (ABSIZ(a) < ABSIZ(c)) MPZ_SRCPTR_SWAP (a, c); asize = SIZ(a); csize = SIZ(c); sign = (asize ^ csize); asize = ABS(asize); ap = PTR(a); if (csize == 0) return mpn_divisible_p (ap, asize, dp, dsize); csize = ABS(csize); cp = PTR(c); alow = ap[0]; clow = cp[0]; dlow = dp[0]; /* Check a==c mod low zero bits of dlow. This might catch a few cases of a!=c quickly, and it helps the csize==1 special cases below. */ dmask = LOW_ZEROS_MASK (dlow) & GMP_NUMB_MASK; alow = (sign >= 0 ? alow : -alow); if (((alow-clow) & dmask) != 0) return 0; if (csize == 1) { if (dsize == 1) { cong_1: if (sign < 0) NEG_MOD (clow, clow, dlow); if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD)) { r = mpn_mod_1 (ap, asize, dlow); if (clow < dlow) return r == clow; else return r == (clow % dlow); } if ((dlow & 1) == 0) { /* Strip low zero bits to get odd d required by modexact. If d==e*2^n then a==c mod d if and only if both a==c mod e and a==c mod 2^n, the latter having been done above. */ unsigned twos; count_trailing_zeros (twos, dlow); dlow >>= twos; } r = mpn_modexact_1c_odd (ap, asize, dlow, clow); return r == 0 || r == dlow; }
unsigned long knuth_schroeppel(QS_t * qs_inf) { float best_factor = -10.0f; unsigned long multiplier = 1; unsigned long nmod8, mod8, multindex, prime, nmod, mult; const unsigned long max_fb_primes = qs_inf->num_primes; unsigned long fb_prime = 2; // leave space for the multiplier and 2 float factors[NUMMULTS]; float logpdivp; double pinv; int kron; uint32_t * sqrts = qs_inf->sqrts; fmpz_t n = qs_inf->n; nmod8 = n[1]%8; mpz_t r; for (multindex = 0; multindex < NUMMULTS; multindex++) { mod8 = ((nmod8*multipliers[multindex])%8); factors[multindex] = 0.34657359; // ln2/2 if (mod8 == 1) factors[multindex] *= 4.0; if (mod8 == 5) factors[multindex] *= 2.0; factors[multindex] -= (log((float) multipliers[multindex]) / 2.0); } prime = 3; while (prime < KSMAX)// && (fb_prime < max_fb_primes)) { pinv = z_precompute_inverse(prime); logpdivp = log((float)prime) / (float)prime; // log p / p nmod = mpn_mod_1(n + 1, n[0], prime); if (nmod == 0) return prime; kron = z_jacobi(nmod, prime); for (multindex = 0; multindex < NUMMULTS; multindex++) { mult = multipliers[multindex]; if (mult >= prime) { if (mult >= prime*prime) mult = mult%prime; else mult = z_mod_precomp(mult, prime, pinv); } if (mult == 0) factors[multindex] += logpdivp; else if (kron*z_jacobi(mult, prime) == 1) factors[multindex] += 2.0*logpdivp; } prime = z_nextprime(prime, 0); } for (multindex=0; multindex<NUMMULTS; multindex++) { if (factors[multindex] > best_factor) { best_factor = factors[multindex]; multiplier = multipliers[multindex]; } } qs_inf->k = multiplier; return 0; }
void check (void) { mp_limb_t wp[100], xp[100], yp[100]; mp_size_t size = 100; refmpn_zero (xp, size); refmpn_zero (yp, size); refmpn_zero (wp, size); pre ("mpn_add_n"); mpn_add_n (wp, xp, yp, size); post (); #if HAVE_NATIVE_mpn_add_nc pre ("mpn_add_nc"); mpn_add_nc (wp, xp, yp, size, CNST_LIMB(0)); post (); #endif #if HAVE_NATIVE_mpn_addlsh1_n pre ("mpn_addlsh1_n"); mpn_addlsh1_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_and_n pre ("mpn_and_n"); mpn_and_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_andn_n pre ("mpn_andn_n"); mpn_andn_n (wp, xp, yp, size); post (); #endif pre ("mpn_addmul_1"); mpn_addmul_1 (wp, xp, size, yp[0]); post (); #if HAVE_NATIVE_mpn_addmul_1c pre ("mpn_addmul_1c"); mpn_addmul_1c (wp, xp, size, yp[0], CNST_LIMB(0)); post (); #endif #if HAVE_NATIVE_mpn_com_n pre ("mpn_com_n"); mpn_com_n (wp, xp, size); post (); #endif #if HAVE_NATIVE_mpn_copyd pre ("mpn_copyd"); mpn_copyd (wp, xp, size); post (); #endif #if HAVE_NATIVE_mpn_copyi pre ("mpn_copyi"); mpn_copyi (wp, xp, size); post (); #endif pre ("mpn_divexact_1"); mpn_divexact_1 (wp, xp, size, CNST_LIMB(123)); post (); pre ("mpn_divexact_by3c"); mpn_divexact_by3c (wp, xp, size, CNST_LIMB(0)); post (); pre ("mpn_divrem_1"); mpn_divrem_1 (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123)); post (); #if HAVE_NATIVE_mpn_divrem_1c pre ("mpn_divrem_1c"); mpn_divrem_1c (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123), CNST_LIMB(122)); post (); #endif pre ("mpn_gcd_1"); xp[0] |= 1; notdead += (unsigned long) mpn_gcd_1 (xp, size, CNST_LIMB(123)); post (); #if HAVE_NATIVE_mpn_gcd_finda pre ("mpn_gcd_finda"); xp[0] |= 1; xp[1] |= 1; notdead += mpn_gcd_finda (xp); post (); #endif pre ("mpn_hamdist"); notdead += mpn_hamdist (xp, yp, size); post (); #if HAVE_NATIVE_mpn_ior_n pre ("mpn_ior_n"); mpn_ior_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_iorn_n pre ("mpn_iorn_n"); mpn_iorn_n (wp, xp, yp, size); post (); #endif pre ("mpn_lshift"); mpn_lshift (wp, xp, size, 1); post (); pre ("mpn_mod_1"); notdead += mpn_mod_1 (xp, size, CNST_LIMB(123)); post (); #if HAVE_NATIVE_mpn_mod_1c pre ("mpn_mod_1c"); notdead += mpn_mod_1c (xp, size, CNST_LIMB(123), CNST_LIMB(122)); post (); #endif #if GMP_NUMB_BITS % 4 == 0 pre ("mpn_mod_34lsub1"); notdead += mpn_mod_34lsub1 (xp, size); post (); #endif pre ("mpn_modexact_1_odd"); notdead += mpn_modexact_1_odd (xp, size, CNST_LIMB(123)); post (); pre ("mpn_modexact_1c_odd"); notdead += mpn_modexact_1c_odd (xp, size, CNST_LIMB(123), CNST_LIMB(456)); post (); pre ("mpn_mul_1"); mpn_mul_1 (wp, xp, size, yp[0]); post (); #if HAVE_NATIVE_mpn_mul_1c pre ("mpn_mul_1c"); mpn_mul_1c (wp, xp, size, yp[0], CNST_LIMB(0)); post (); #endif #if HAVE_NATIVE_mpn_mul_2 pre ("mpn_mul_2"); mpn_mul_2 (wp, xp, size-1, yp); post (); #endif pre ("mpn_mul_basecase"); mpn_mul_basecase (wp, xp, (mp_size_t) 3, yp, (mp_size_t) 3); post (); #if HAVE_NATIVE_mpn_nand_n pre ("mpn_nand_n"); mpn_nand_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_nior_n pre ("mpn_nior_n"); mpn_nior_n (wp, xp, yp, size); post (); #endif pre ("mpn_popcount"); notdead += mpn_popcount (xp, size); post (); pre ("mpn_preinv_mod_1"); notdead += mpn_preinv_mod_1 (xp, size, GMP_NUMB_MAX, refmpn_invert_limb (GMP_NUMB_MAX)); post (); #if USE_PREINV_DIVREM_1 || HAVE_NATIVE_mpn_preinv_divrem_1 pre ("mpn_preinv_divrem_1"); mpn_preinv_divrem_1 (wp, (mp_size_t) 0, xp, size, GMP_NUMB_MAX, refmpn_invert_limb (GMP_NUMB_MAX), 0); post (); #endif #if HAVE_NATIVE_mpn_rsh1add_n pre ("mpn_rsh1add_n"); mpn_rsh1add_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_rsh1sub_n pre ("mpn_rsh1sub_n"); mpn_rsh1sub_n (wp, xp, yp, size); post (); #endif pre ("mpn_rshift"); mpn_rshift (wp, xp, size, 1); post (); pre ("mpn_sqr_basecase"); mpn_sqr_basecase (wp, xp, (mp_size_t) 3); post (); pre ("mpn_submul_1"); mpn_submul_1 (wp, xp, size, yp[0]); post (); #if HAVE_NATIVE_mpn_submul_1c pre ("mpn_submul_1c"); mpn_submul_1c (wp, xp, size, yp[0], CNST_LIMB(0)); post (); #endif pre ("mpn_sub_n"); mpn_sub_n (wp, xp, yp, size); post (); #if HAVE_NATIVE_mpn_sub_nc pre ("mpn_sub_nc"); mpn_sub_nc (wp, xp, yp, size, CNST_LIMB(0)); post (); #endif #if HAVE_NATIVE_mpn_sublsh1_n pre ("mpn_sublsh1_n"); mpn_sublsh1_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_udiv_qrnnd pre ("mpn_udiv_qrnnd"); mpn_udiv_qrnnd (&wp[0], CNST_LIMB(122), xp[0], CNST_LIMB(123)); post (); #endif #if HAVE_NATIVE_mpn_udiv_qrnnd_r pre ("mpn_udiv_qrnnd_r"); mpn_udiv_qrnnd (CNST_LIMB(122), xp[0], CNST_LIMB(123), &wp[0]); post (); #endif #if HAVE_NATIVE_mpn_umul_ppmm pre ("mpn_umul_ppmm"); mpn_umul_ppmm (&wp[0], xp[0], yp[0]); post (); #endif #if HAVE_NATIVE_mpn_umul_ppmm_r pre ("mpn_umul_ppmm_r"); mpn_umul_ppmm_r (&wp[0], xp[0], yp[0]); post (); #endif #if HAVE_NATIVE_mpn_xor_n pre ("mpn_xor_n"); mpn_xor_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_xnor_n pre ("mpn_xnor_n"); mpn_xnor_n (wp, xp, yp, size); post (); #endif }