static int ref_modinv (mp_limb_t *rp, const mp_limb_t *ap, const mp_limb_t *mp, mp_size_t mn) { mp_limb_t tp[4*(mn+1)]; mp_limb_t *up = tp; mp_limb_t *vp = tp + mn+1; mp_limb_t *gp = tp + 2*(mn+1); mp_limb_t *sp = tp + 3*(mn+1); mp_size_t gn, sn; mpn_copyi (up, ap, mn); mpn_copyi (vp, mp, mn); gn = mpn_gcdext (gp, sp, &sn, up, mn, vp, mn); if (gn != 1 || gp[0] != 1) return 0; if (sn < 0) mpn_sub (sp, mp, mn, sp, -sn); else if (sn < mn) /* Zero-pad. */ mpn_zero (sp + sn, mn - sn); mpn_copyi (rp, sp, mn); return 1; }
void _fmpz_poly_sqrlow(fmpz * res, const fmpz * poly, long len, long n) { mp_size_t limbs; if (n < 7) { _fmpz_poly_sqrlow_classical(res, poly, len, n); return; } limbs = _fmpz_vec_max_limbs(poly, len); if (n < 16 && limbs > 12) { int i; fmpz *copy; copy = flint_malloc(n * sizeof(fmpz)); for (i = 0; i < len; i++) copy[i] = poly[i]; mpn_zero((mp_ptr) copy + len, n - len); _fmpz_poly_sqrlow_karatsuba_n(res, copy, n); flint_free(copy); } else if (limbs <= 4) _fmpz_poly_sqrlow_KS(res, poly, len, n); else if (limbs/2048 > len) _fmpz_poly_sqrlow_KS(res, poly, len, n); else if (limbs*FLINT_BITS*4 < len) _fmpz_poly_sqrlow_KS(res, poly, len, n); else _fmpz_poly_mullow_SS(res, poly, len, poly, len, n); }
void fmpq_poly_set_coeff_mpz(fmpq_poly_t poly, long n, const mpz_t x) { long len = poly->length; const int replace = (n < len && !fmpz_is_zero(poly->coeffs + n)); if (!replace && mpz_sgn(x) == 0) return; if (n + 1 > len) { fmpq_poly_fit_length(poly, n + 1); _fmpq_poly_set_length(poly, n + 1); mpn_zero((mp_ptr) poly->coeffs + len, (n + 1) - len); } if (*poly->den == 1L) { fmpz_set_mpz(poly->coeffs + n, x); if (replace) _fmpq_poly_normalise(poly); } else { fmpz_set_mpz(poly->coeffs + n, x); fmpz_mul(poly->coeffs + n, poly->coeffs + n, poly->den); if (replace) fmpq_poly_canonicalise(poly); } }
static int modinv_gcd (const struct ecc_curve *ecc, mp_limb_t *rp, mp_limb_t *ap, mp_limb_t *tp) { mp_size_t size = ecc->p.size; mp_limb_t *up = tp; mp_limb_t *vp = tp + size+1; mp_limb_t *gp = tp + 2*(size+1); mp_limb_t *sp = tp + 3*(size+1); mp_size_t gn, sn; mpn_copyi (up, ap, size); mpn_copyi (vp, ecc->p.m, size); gn = mpn_gcdext (gp, sp, &sn, up, size, vp, size); if (gn != 1 || gp[0] != 1) return 0; if (sn < 0) mpn_sub (sp, ecc->p.m, size, sp, -sn); else if (sn < size) /* Zero-pad. */ mpn_zero (sp + sn, size - sn); mpn_copyi (rp, sp, size); return 1; }
void fmpz_poly_realloc(fmpz_poly_t poly, long alloc) { if (alloc == 0) /* Clear up, reinitialise */ { fmpz_poly_clear(poly); fmpz_poly_init(poly); return; } if (poly->alloc) /* Realloc */ { fmpz_poly_truncate(poly, alloc); poly->coeffs = (fmpz *) flint_realloc(poly->coeffs, alloc * sizeof(fmpz)); if (alloc > poly->alloc) mpn_zero((mp_ptr) (poly->coeffs + poly->alloc), alloc - poly->alloc); } else /* Nothing allocated already so do it now */ { poly->coeffs = (fmpz *) flint_calloc(alloc, sizeof(fmpz)); } poly->alloc = alloc; }
void nmod_poly_cosh_series(nmod_poly_t g, const nmod_poly_t h, long n) { mp_ptr g_coeffs, h_coeffs; nmod_poly_t t1; long h_len; h_len = h->length; if (h_len > 0 && h->coeffs[0] != 0UL) { printf("Exception: nmod_poly_cosh_series: constant term != 0\n"); abort(); } if (h_len == 1 || n < 2) { nmod_poly_zero(g); if (n > 0) nmod_poly_set_coeff_ui(g, 0, 1UL); return; } if (h_len < n) { h_coeffs = _nmod_vec_init(n); mpn_copyi(h_coeffs, h->coeffs, h_len); mpn_zero(h_coeffs + h_len, n - h_len); } else h_coeffs = h->coeffs; if (h == g && h_len >= n) { nmod_poly_init2(t1, h->mod.n, n); g_coeffs = t1->coeffs; } else { nmod_poly_fit_length(g, n); g_coeffs = g->coeffs; } _nmod_poly_cosh_series(g_coeffs, h_coeffs, n, h->mod); if (h == g && h_len >= n) { nmod_poly_swap(g, t1); nmod_poly_clear(t1); } g->length = n; if (h_len < n) _nmod_vec_free(h_coeffs); _nmod_poly_normalise(g); }
void mpz_limbs_copy (mp_limb_t *xp, mpz_srcptr x, mp_size_t n) { mp_size_t xn = mpz_size (x); assert (xn <= n); mpn_copyi (xp, mpz_limbs_read (x), xn); if (xn < n) mpn_zero (xp + xn, n - xn); }
void nmod_poly_revert_series_lagrange(nmod_poly_t Qinv, const nmod_poly_t Q, long n) { mp_ptr Qinv_coeffs, Q_coeffs; nmod_poly_t t1; long Qlen; Qlen = Q->length; if (Qlen < 2 || Q->coeffs[0] != 0 || Q->coeffs[1] == 0) { printf("exception: nmod_poly_revert_series_lagrange: input must have " "zero constant and an invertible coefficient of x^1"); abort(); } if (Qlen < n) { Q_coeffs = _nmod_vec_init(n); mpn_copyi(Q_coeffs, Q->coeffs, Qlen); mpn_zero(Q_coeffs + Qlen, n - Qlen); } else Q_coeffs = Q->coeffs; if (Q == Qinv && Qlen >= n) { nmod_poly_init2(t1, Q->mod.n, n); Qinv_coeffs = t1->coeffs; } else { nmod_poly_fit_length(Qinv, n); Qinv_coeffs = Qinv->coeffs; } _nmod_poly_revert_series_lagrange(Qinv_coeffs, Q_coeffs, n, Q->mod); if (Q == Qinv && Qlen >= n) { nmod_poly_swap(Qinv, t1); nmod_poly_clear(t1); } Qinv->length = n; if (Qlen < n) _nmod_vec_clear(Q_coeffs); _nmod_poly_normalise(Qinv); }
void nmod_poly_inv_series_basecase(nmod_poly_t Qinv, const nmod_poly_t Q, long n) { mp_ptr Qinv_coeffs, Q_coeffs; nmod_poly_t t1; long Qlen; Qlen = Q->length; if (n == 0 || Q->length == 0 || Q->coeffs[0] == 0) { printf("Exception: division by zero in nmod_poly_inv_series_basecase\n"); abort(); } if (Qlen < n) { Q_coeffs = _nmod_vec_init(n); mpn_copyi(Q_coeffs, Q->coeffs, Qlen); mpn_zero(Q_coeffs + Qlen, n - Qlen); } else Q_coeffs = Q->coeffs; if (Q == Qinv && Qlen >= n) { nmod_poly_init2(t1, Q->mod.n, n); Qinv_coeffs = t1->coeffs; } else { nmod_poly_fit_length(Qinv, n); Qinv_coeffs = Qinv->coeffs; } _nmod_poly_inv_series_basecase(Qinv_coeffs, Q_coeffs, n, Q->mod); if (Q == Qinv && Qlen >= n) { nmod_poly_swap(Qinv, t1); nmod_poly_clear(t1); } Qinv->length = n; if (Qlen < n) _nmod_vec_free(Q_coeffs); _nmod_poly_normalise(Qinv); }
void fmpz_poly_set_coeff_si(fmpz_poly_t poly, long n, long x) { fmpz_poly_fit_length(poly, n + 1); if (n + 1 > poly->length) /* insert zeroes between end of poly and new coeff if needed */ { mpn_zero((mp_ptr) (poly->coeffs + poly->length), n - poly->length); poly->length = n + 1; } fmpz_set_si(poly->coeffs + n, x); _fmpz_poly_normalise(poly); /* we may have set leading coefficient to zero */ }
int mpn_mul_fft(mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, int k) { mp_ptr rpp, app, bpp, tpp; mp_size_t t = rn + 1; TMP_DECL; TMP_MARK; rpp = (mp_ptr)TMP_ALLOC_LIMBS(t); tpp = (mp_ptr)TMP_ALLOC_LIMBS(t); app = (mp_ptr)TMP_ALLOC_LIMBS(t); bpp = (mp_ptr)TMP_ALLOC_LIMBS(t); mpn_copyi(app, ap, an); mpn_zero(app + an, t - an); mpn_copyi(bpp, bp, bn); mpn_zero(bpp + bn, t - bn); mpn_mulmod_Bexpp1(rpp, app, bpp, rn, tpp); mpn_copyi(rp, rpp, rn); t = rpp[rn]; TMP_FREE; return t; }
/* Get a pointer to an n limb area, for read-only operation. n must be greater or equal to the current size, and the mpz is zero-padded if needed. */ const mp_limb_t * mpz_limbs_read_n (mpz_ptr x, mp_size_t n) { mp_size_t xn = mpz_size (x); mp_ptr xp; assert (xn <= n); xp = mpz_limbs_modify (x, n); if (xn < n) mpn_zero (xp + xn, n - xn); return xp; }
static void table_init (const struct ecc_curve *ecc, mp_limb_t *table, unsigned bits, int initial, const mp_limb_t *p, mp_limb_t *scratch) { unsigned size = 1 << bits; unsigned j; mpn_zero (TABLE(0), 3*ecc->size); ecc_a_to_j (ecc, initial, TABLE(1), p); for (j = 2; j < size; j += 2) { ecc_dup_jj (ecc, TABLE(j), TABLE(j/2), scratch); ecc_add_jja (ecc, TABLE(j+1), TABLE(j), TABLE(1), scratch); } }
void fmpz_poly_bit_pack(fmpz_t f, const fmpz_poly_t poly, mp_bitcnt_t bit_size) { long len; __mpz_struct * mpz; long i, d; int negate; len = fmpz_poly_length(poly); if (len == 0 || bit_size == 0) { fmpz_zero(f); return; } mpz = _fmpz_promote(f); mpz_realloc2(mpz, len * bit_size); d = mpz->_mp_alloc; mpn_zero(mpz->_mp_d, d); if (fmpz_sgn(fmpz_poly_lead(poly)) < 0) negate = -1; else negate = 0; _fmpz_poly_bit_pack(mpz->_mp_d, poly->coeffs, len, bit_size, negate); for (i = d - 1; i >= 0; i--) { if (mpz->_mp_d[i] != 0) break; } d = i + 1; mpz->_mp_size = d; _fmpz_demote_val(f); if (negate) fmpz_neg(f, f); }
static void table_init (const struct ecc_curve *ecc, mp_limb_t *table, unsigned bits, const mp_limb_t *p, mp_limb_t *scratch) { unsigned size = 1 << bits; unsigned j; mpn_zero (TABLE(0), 3*ecc->p.size); TABLE(0)[ecc->p.size] = TABLE(0)[2*ecc->p.size] = 1; ecc_a_to_j (ecc, TABLE(1), p); for (j = 2; j < size; j += 2) { ecc_dup_eh (ecc, TABLE(j), TABLE(j/2), scratch); ecc_add_ehh (ecc, TABLE(j+1), TABLE(j), TABLE(1), scratch); } }
void ecc_mul_a (const struct ecc_curve *ecc, int initial, mp_limb_t *r, const mp_limb_t *np, const mp_limb_t *p, mp_limb_t *scratch) { #define tp scratch #define pj (scratch + 3*ecc->size) #define scratch_out (scratch + 6*ecc->size) int is_zero; unsigned i; ecc_a_to_j (ecc, initial, pj, p); mpn_zero (r, 3*ecc->size); for (i = ecc->size, is_zero = 1; i-- > 0; ) { mp_limb_t w = np[i]; mp_limb_t bit; for (bit = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1); bit > 0; bit >>= 1) { int digit; ecc_dup_jj (ecc, r, r, scratch_out); ecc_add_jja (ecc, tp, r, pj, scratch_out); digit = (w & bit) > 0; /* If is_zero is set, r is the zero point, and ecc_add_jja produced garbage. */ cnd_copy (is_zero, tp, pj, 3*ecc->size); is_zero &= ~digit; /* If we had a one-bit, use the sum. */ cnd_copy (digit, r, tp, 3*ecc->size); } } }
void _nmod_poly_inv_series_basecase(mp_ptr Qinv, mp_srcptr Q, long n, nmod_t mod) { mp_ptr X2n, Qrev; X2n = _nmod_vec_init(2*n); Qrev = X2n + n; _nmod_poly_reverse(Qrev, Q, n, n); X2n[n - 1] = 1; mpn_zero(X2n, n - 1); X2n -= (n - 1); _nmod_poly_div_divconquer(Qinv, X2n, 2*n - 1, Qrev, n, mod); _nmod_poly_reverse(Qinv, Qinv, n, n); _nmod_vec_free(X2n + n - 1); }
void ecc_mul_a_eh (const struct ecc_curve *ecc, mp_limb_t *r, const mp_limb_t *np, const mp_limb_t *p, mp_limb_t *scratch) { #define pe scratch #define tp (scratch + 3*ecc->p.size) #define scratch_out (scratch + 6*ecc->p.size) unsigned i; ecc_a_to_j (ecc, pe, p); /* x = 0, y = 1, z = 1 */ mpn_zero (r, 3*ecc->p.size); r[ecc->p.size] = r[2*ecc->p.size] = 1; for (i = ecc->p.size; i-- > 0; ) { mp_limb_t w = np[i]; mp_limb_t bit; for (bit = (mp_limb_t) 1 << (GMP_NUMB_BITS - 1); bit > 0; bit >>= 1) { int digit; ecc_dup_eh (ecc, r, r, scratch_out); ecc_add_ehh (ecc, tp, r, pe, scratch_out); digit = (w & bit) > 0; /* If we had a one-bit, use the sum. */ cnd_copy (digit, r, tp, 3*ecc->p.size); } } }
void nmod_poly_tanh_series(nmod_poly_t g, const nmod_poly_t h, long n) { mp_ptr h_coeffs; long h_len = h->length; if (h_len > 0 && h->coeffs[0] != 0UL) { printf("Exception: nmod_poly_tanh_series: constant term != 0\n"); abort(); } if (h_len == 1 || n < 2) { nmod_poly_zero(g); return; } nmod_poly_fit_length(g, n); if (h_len < n) { h_coeffs = _nmod_vec_init(n); mpn_copyi(h_coeffs, h->coeffs, h_len); mpn_zero(h_coeffs + h_len, n - h_len); } else h_coeffs = h->coeffs; _nmod_poly_tanh_series(g->coeffs, h_coeffs, n, h->mod); if (h_len < n) _nmod_vec_free(h_coeffs); g->length = n; _nmod_poly_normalise(g); }
void test_main (void) { unsigned i; for (i = 0; ecc_curves[i]; i++) { const struct ecc_curve *ecc = ecc_curves[i]; mp_limb_t *g = xalloc_limbs (ecc_size_j (ecc)); mp_limb_t *g2 = xalloc_limbs (ecc_size_j (ecc)); mp_limb_t *g3 = xalloc_limbs (ecc_size_j (ecc)); mp_limb_t *p = xalloc_limbs (ecc_size_j (ecc)); mp_limb_t *scratch = xalloc_limbs (ECC_ADD_JJJ_ITCH(ecc->p.size)); if (ecc->p.bit_size == 255) { mp_limb_t *z = xalloc_limbs (ecc_size_j (ecc)); /* Zero point has x = 0, y = 1, z = 1 */ mpn_zero (z, 3*ecc->p.size); z[ecc->p.size] = z[2*ecc->p.size] = 1; ecc_a_to_j (ecc, g, ecc->g); ecc_add_ehh (ecc, p, z, z, scratch); test_ecc_mul_h (i, 0, p); ecc_add_eh (ecc, p, z, z, scratch); test_ecc_mul_h (i, 0, p); ecc_add_ehh (ecc, p, g, p, scratch); test_ecc_mul_h (i, 1, p); ecc_add_eh (ecc, p, z, g, scratch); test_ecc_mul_h (i, 1, p); ecc_add_ehh (ecc, g2, g, p, scratch); test_ecc_mul_h (i, 2, g2); ecc_add_eh (ecc, g2, g, g, scratch); test_ecc_mul_h (i, 2, g2); ecc_add_ehh (ecc, g3, g, g2, scratch); test_ecc_mul_h (i, 3, g3); ecc_add_eh (ecc, g3, g2, g, scratch); test_ecc_mul_h (i, 3, g3); ecc_add_ehh (ecc, p, g, g3, scratch); test_ecc_mul_h (i, 4, p); ecc_add_eh (ecc, p, g3, g, scratch); test_ecc_mul_h (i, 4, p); ecc_add_ehh (ecc, p, g2, g2, scratch); test_ecc_mul_h (i, 4, p); free (z); } else { ecc_a_to_j (ecc, g, ecc->g); ecc_dup_jj (ecc, g2, g, scratch); test_ecc_mul_h (i, 2, g2); ecc_add_jjj (ecc, g3, g, g2, scratch); test_ecc_mul_h (i, 3, g3); ecc_add_jjj (ecc, g3, g2, g, scratch); test_ecc_mul_h (i, 3, g3); ecc_add_jjj (ecc, p, g, g3, scratch); test_ecc_mul_h (i, 4, p); ecc_add_jjj (ecc, p, g3, g, scratch); test_ecc_mul_h (i, 4, p); ecc_dup_jj (ecc, p, g2, scratch); test_ecc_mul_h (i, 4, p); } free (g); free (g2); free (g3); free (p); free (scratch); } }
void test_main (void) { gmp_randstate_t rands; mpz_t r; unsigned i; gmp_randinit_default (rands); mpz_init (r); for (i = 0; ecc_curves[i]; i++) { const struct ecc_curve *ecc = ecc_curves[i]; mp_size_t size = ecc_size (ecc); mp_limb_t *p = xalloc_limbs (ecc_size_j (ecc)); mp_limb_t *q = xalloc_limbs (ecc_size_j (ecc)); mp_limb_t *n = xalloc_limbs (size); mp_limb_t *scratch = xalloc_limbs (ecc->mul_g_itch); mpn_zero (n, size); n[0] = 1; ecc->mul_g (ecc, p, n, scratch); ecc->h_to_a (ecc, 0, p, p, scratch); if (mpn_cmp (p, ecc->g, 2*size != 0)) { fprintf (stderr, "ecc->mul_g with n = 1 failed.\n"); abort (); } for (n[0] = 2; n[0] <= 4; n[0]++) { ecc->mul_g (ecc, p, n, scratch); test_ecc_mul_h (i, n[0], p); } /* (order - 1) * g = - g */ mpn_sub_1 (n, ecc->q.m, size, 1); ecc->mul_g (ecc, p, n, scratch); ecc->h_to_a (ecc, 0, p, p, scratch); if (ecc->p.bit_size == 255) /* For edwards curves, - (x,y ) == (-x, y). FIXME: Swap x and y, to get identical negation? */ mpn_sub_n (p, ecc->p.m, p, size); else mpn_sub_n (p + size, ecc->p.m, p + size, size); if (mpn_cmp (p, ecc->g, 2*size) != 0) { fprintf (stderr, "ecc->mul_g with n = order - 1 failed.\n"); abort (); } free (n); free (p); free (q); free (scratch); } mpz_clear (r); gmp_randclear (rands); }
void nmod_poly_compose_mod_horner(nmod_poly_t res, const nmod_poly_t poly1, const nmod_poly_t poly2, const nmod_poly_t poly3) { long len1 = poly1->length; long len2 = poly2->length; long len3 = poly3->length; long len = len3 - 1; mp_ptr ptr2; if (len3 == 0) { printf("exception: division by zero in nmod_poly_compose_mod_horner\n"); abort(); } if (len1 == 0 || len3 == 1) { nmod_poly_zero(res); return; } if (len1 == 1) { nmod_poly_set(res, poly1); return; } if (res == poly3 || res == poly1) { nmod_poly_t tmp; nmod_poly_init_preinv(tmp, res->mod.n, res->mod.ninv); nmod_poly_compose_mod_horner(tmp, poly1, poly2, poly3); nmod_poly_swap(tmp, res); nmod_poly_clear(tmp); return; } ptr2 = _nmod_vec_init(len); if (len2 <= len) { mpn_copyi(ptr2, poly2->coeffs, len2); mpn_zero(ptr2 + len2, len - len2); } else { _nmod_poly_rem(ptr2, poly2->coeffs, len2, poly3->coeffs, len3, res->mod); } nmod_poly_fit_length(res, len); _nmod_poly_compose_mod_horner(res->coeffs, poly1->coeffs, len1, ptr2, poly3->coeffs, len3, res->mod); res->length = len; _nmod_poly_normalise(res); _nmod_vec_clear(ptr2); }
void nmod_poly_compose_mod_brent_kung(nmod_poly_t res, const nmod_poly_t poly1, const nmod_poly_t poly2, const nmod_poly_t poly3) { long len1 = poly1->length; long len2 = poly2->length; long len3 = poly3->length; long len = len3 - 1; mp_ptr ptr2; if (len3 == 0) { printf("exception: division by zero in " "nmod_poly_compose_mod_brent_kung\n"); abort(); } if (len1 >= len3) { printf("exception: nmod_poly_compose_brent_kung: the degree of the" " first polynomial must be smaller than that of the modulus\n"); abort(); } if (len1 == 0 || len3 == 1) { nmod_poly_zero(res); return; } if (len1 == 1) { nmod_poly_set(res, poly1); return; } if (res == poly3 || res == poly1) { nmod_poly_t tmp; nmod_poly_init_preinv(tmp, res->mod.n, res->mod.ninv); nmod_poly_compose_mod_brent_kung(tmp, poly1, poly2, poly3); nmod_poly_swap(tmp, res); nmod_poly_clear(tmp); return; } ptr2 = _nmod_vec_init(len); if (len2 <= len) { mpn_copyi(ptr2, poly2->coeffs, len2); mpn_zero(ptr2 + len2, len - len2); } else { _nmod_poly_rem(ptr2, poly2->coeffs, len2, poly3->coeffs, len3, res->mod); } nmod_poly_fit_length(res, len); _nmod_poly_compose_mod_brent_kung(res->coeffs, poly1->coeffs, len1, ptr2, poly3->coeffs, len3, res->mod); res->length = len; _nmod_poly_normalise(res); _nmod_vec_clear(ptr2); }
void mpir_fft_mulmod_2expp1(mp_ptr r1, mp_srcptr i1, mp_srcptr i2, mp_size_t r_limbs, mp_bitcnt_t depth, mp_bitcnt_t w) { mp_size_t n = (((mp_size_t)1)<<depth); mp_bitcnt_t bits1 = (r_limbs*GMP_LIMB_BITS)/(2*n); mp_size_t limb_add, limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i, j, ll; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, *tt, *t1, *t2, *s1, *r, *ii0, *jj0; mp_limb_t c; TMP_DECL; TMP_MARK; ii = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 4*n + 5*size); for (i = 0, ptr = (mp_ptr) ii + 2*n; i < 2*n; i++, ptr += size) { ii[i] = ptr; } ii0 = ptr; t1 = ii0 + 2*n; t2 = t1 + size; s1 = t2 + size; r = s1 + size; tt = r + 2*n; if (i1 != i2) { jj = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 2*n); for (i = 0, ptr = (mp_ptr) jj + 2*n; i < 2*n; i++, ptr += size) { jj[i] = ptr; } jj0 = ptr; } else { jj = ii; jj0 = ii0; } j = mpir_fft_split_bits(ii, i1, r_limbs, bits1, limbs); for ( ; j < 2*n; j++) mpn_zero(ii[j], limbs + 1); for (i = 0; i < 2*n; i++) ii0[i] = ii[i][0]; mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1); for (j = 0; j < 2*n; j++) mpn_normmod_2expp1(ii[j], limbs); if (i1 != i2) { j = mpir_fft_split_bits(jj, i2, r_limbs, bits1, limbs); for ( ; j < 2*n; j++) mpn_zero(jj[j], limbs + 1); for (i = 0; i < 2*n; i++) jj0[i] = jj[i][0]; mpir_fft_negacyclic(jj, n, w, &t1, &t2, &s1); } for (j = 0; j < 2*n; j++) { if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs); c = 2*ii[j][limbs] + jj[j][limbs]; ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt); } mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1); mpir_fft_naive_convolution_1(r, ii0, jj0, 2*n); for (j = 0; j < 2*n; j++) { mp_limb_t t, cy2; mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 1); mpn_normmod_2expp1(ii[j], limbs); t = ii[j][limbs]; ii[j][limbs] = r[j] - ii[j][0]; cy2 = mpn_add_1(ii[j], ii[j], limbs + 1, ii[j][limbs]); add_ssaaaa(r[j], ii[j][limbs], 0, ii[j][limbs], 0, t); if (cy2) r[j]++; } mpn_zero(r1, r_limbs + 1); mpir_fft_combine_bits(r1, ii, 2*n - 1, bits1, limbs + 1, r_limbs + 1); /* as the negacyclic convolution has effectively done subtractions some of the coefficients will be negative, so need to subtract p */ ll = 0; limb_add = bits1/GMP_LIMB_BITS; for (j = 0; j < 2*n - 2; j++) { if (r[j]) mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1); else if ((mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */ { mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1); mpn_sub_1(r1 + ll + limbs + 1, r1 + ll + limbs + 1, r_limbs - limbs - ll, 1); } ll += limb_add; } /* penultimate coefficient, top bit was already ignored */ if (r[j] || (mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */ mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1); /* final coefficient wraps around */ if (limb_add) r1[r_limbs] += mpn_add_n(r1 + r_limbs - limb_add, r1 + r_limbs - limb_add, ii[2*n - 1], limb_add); c = mpn_sub_n(r1, r1, ii[2*n - 1] + limb_add, limbs + 1 - limb_add); mpn_addmod_2expp1_1(r1 + limbs + 1 - limb_add, r_limbs - limbs - 1 + limb_add, -c); mpn_normmod_2expp1(r1, r_limbs); TMP_FREE; }
/* Assumes len1 != 0 != len2 */ int _fmpz_poly_gcd_heuristic(fmpz * res, const fmpz * poly1, long len1, const fmpz * poly2, long len2) { ulong bits1, bits2, max_bits, pack_bits, bound_bits, bits_G, bits_Q; ulong limbs1, limbs2, limbsg, pack_limbs, qlimbs; ulong log_glen, log_length; long sign1, sign2, glen, qlen; fmpz_t ac, bc, d, gc; fmpz * A, * B, * G, * Q, * t; mp_ptr array1, array2, arrayg, q, temp; int divides; fmpz_init(ac); fmpz_init(bc); fmpz_init(d); /* compute gcd of content of poly1 and poly2 */ _fmpz_poly_content(ac, poly1, len1); _fmpz_poly_content(bc, poly2, len2); fmpz_gcd(d, ac, bc); /* special case, one of the polys is a constant */ if (len2 == 1) /* if len1 == 1 then so does len2 */ { fmpz_set(res, d); fmpz_clear(ac); fmpz_clear(bc); fmpz_clear(d); return 1; } /* divide poly1 and poly2 by their content */ A = _fmpz_vec_init(len1); B = _fmpz_vec_init(len2); _fmpz_vec_scalar_divexact_fmpz(A, poly1, len1, ac); _fmpz_vec_scalar_divexact_fmpz(B, poly2, len2, bc); fmpz_clear(ac); fmpz_clear(bc); /* special case, one of the polys is length 2 */ if (len2 == 2) /* if len1 == 2 then so does len2 */ { Q = _fmpz_vec_init(len1 - len2 + 1); if (_fmpz_poly_divides(Q, A, len1, B, 2)) { _fmpz_vec_scalar_mul_fmpz(res, B, 2, d); if (fmpz_sgn(res + 1) < 0) _fmpz_vec_neg(res, res, 2); } else { fmpz_set(res, d); fmpz_zero(res + 1); } fmpz_clear(d); _fmpz_vec_clear(A, len1); _fmpz_vec_clear(B, len2); _fmpz_vec_clear(Q, len1 - len2 + 1); return 1; } /* Determine how many bits (pack_bits) to pack into. The bound bound_bits ensures that if G | A and G | B with G primitive then G is the gcd of A and B. The bound is taken from http://arxiv.org/abs/cs/0206032v1 */ bits1 = FLINT_ABS(_fmpz_vec_max_bits(A, len1)); bits2 = FLINT_ABS(_fmpz_vec_max_bits(B, len2)); max_bits = FLINT_MAX(bits1, bits2); bound_bits = FLINT_MIN(bits1, bits2) + 6; pack_bits = FLINT_MAX(bound_bits, max_bits); /* need to pack original polys */ pack_limbs = (pack_bits - 1)/FLINT_BITS + 1; if (pack_bits >= 32) /* pack into multiples of limbs if >= 32 bits */ pack_bits = FLINT_BITS*pack_limbs; /* allocate space to pack into */ limbs1 = (pack_bits*len1 - 1)/FLINT_BITS + 1; limbs2 = (pack_bits*len2 - 1)/FLINT_BITS + 1; array1 = flint_calloc(limbs1, sizeof(mp_limb_t)); array2 = flint_calloc(limbs2, sizeof(mp_limb_t)); arrayg = flint_calloc(limbs2, sizeof(mp_limb_t)); /* pack first poly and normalise */ sign1 = (long) fmpz_sgn(A + len1 - 1); _fmpz_poly_bit_pack(array1, A, len1, pack_bits, sign1); while (array1[limbs1 - 1] == 0) limbs1--; /* pack second poly and normalise */ sign2 = (long) fmpz_sgn(B + len2 - 1); _fmpz_poly_bit_pack(array2, B, len2, pack_bits, sign2); while (array2[limbs2 - 1] == 0) limbs2--; /* compute integer GCD */ limbsg = mpn_gcd_full(arrayg, array1, limbs1, array2, limbs2); /* Make space for unpacked gcd. May have one extra coeff due to 1 0 -x being packed as 0 -1 -x. */ glen = FLINT_MIN((limbsg*FLINT_BITS)/pack_bits + 1, len2); G = _fmpz_vec_init(glen); /* unpack gcd */ _fmpz_poly_bit_unpack(G, glen, arrayg, pack_bits, 0); while (G[glen - 1] == 0) glen--; /* divide by any content */ fmpz_init(gc); _fmpz_poly_content(gc, G, glen); if (!fmpz_is_one(gc)) limbsg = mpn_tdiv_q_fmpz_inplace(arrayg, limbsg, gc); /* make space for quotient and remainder of first poly by gcd */ qlimbs = limbs1 - limbsg + 1; qlen = FLINT_MIN(len1, (qlimbs*FLINT_BITS)/pack_bits + 1); qlimbs = (qlen*pack_bits - 1)/FLINT_BITS + 1; q = flint_calloc(qlimbs, sizeof(mp_limb_t)); temp = flint_malloc(limbsg*sizeof(mp_limb_t)); divides = 0; if (mpn_divides(q, array1, limbs1, arrayg, limbsg, temp)) { /* unpack quotient of first poly by gcd */ Q = _fmpz_vec_init(len1); t = _fmpz_vec_init(len1 + glen); _fmpz_poly_bit_unpack(Q, qlen, q, pack_bits, 0); while (Q[qlen - 1] == 0) qlen--; /* divide by content */ _fmpz_vec_scalar_divexact_fmpz(G, G, glen, gc); /* check if we really need to multiply out to check for exact quotient */ bits_G = FLINT_ABS(_fmpz_vec_max_bits(G, glen)); bits_Q = FLINT_ABS(_fmpz_vec_max_bits(Q, qlen)); log_glen = FLINT_BIT_COUNT(glen); log_length = FLINT_MIN(log_glen, FLINT_BIT_COUNT(qlen)); divides = (bits_G + bits_Q + log_length < pack_bits); if (!divides) /* need to multiply out to check exact quotient */ divides = multiplies_out(A, len1, Q, qlen, G, glen, sign1, t); if (divides) /* quotient really was exact */ { mpn_zero(q, qlimbs); if (mpn_divides(q, array2, limbs2, arrayg, limbsg, temp)) { /* unpack quotient of second poly by gcd */ qlimbs = limbs2 - limbsg + 1; qlen = FLINT_MIN(len2, (qlimbs*FLINT_BITS - 1)/pack_bits + 1); _fmpz_poly_bit_unpack(Q, qlen, q, pack_bits, 0); while (Q[qlen - 1] == 0) qlen--; /* check if we really need to multiply out to check for exact quotient */ bits_Q = FLINT_ABS(_fmpz_vec_max_bits(Q, qlen)); log_length = FLINT_MIN(log_glen, FLINT_BIT_COUNT(qlen)); divides = (bits_G + bits_Q + log_length < pack_bits); if (!divides) /* we need to multiply out */ divides = multiplies_out(B, len2, Q, qlen, G, glen, sign1, t); } } _fmpz_vec_clear(t, len1 + glen); _fmpz_vec_clear(Q, len1); } flint_free(q); flint_free(temp); flint_free(arrayg); flint_free(array1); flint_free(array2); fmpz_clear(gc); _fmpz_vec_clear(A, len1); _fmpz_vec_clear(B, len2); /* we found the gcd, so multiply by content */ if (divides) { _fmpz_vec_zero(res + glen, len2 - glen); _fmpz_vec_scalar_mul_fmpz(res, G, glen, d); } fmpz_clear(d); _fmpz_vec_clear(G, glen); return divides; }
int main(void) { int i, result; flint_rand_t state; flint_randinit(state); printf("rem...."); fflush(stdout); /* Check result of rem */ for (i = 0; i < 1000; i++) { nmod_poly_t a, b, q, r, prod; mp_limb_t n; do n = n_randtest_not_zero(state); while (!n_is_probabprime(n)); nmod_poly_init(a, n); nmod_poly_init(b, n); nmod_poly_init(q, n); nmod_poly_init(r, n); nmod_poly_init(prod, n); nmod_poly_randtest(a, state, n_randint(state, 2000)); do nmod_poly_randtest(b, state, n_randint(state, 2000)); while (b->length == 0); nmod_poly_div(q, a, b); nmod_poly_rem(r, a, b); nmod_poly_mul(prod, q, b); nmod_poly_add(prod, prod, r); result = (nmod_poly_equal(a, prod)); if (!result) { printf("FAIL:\n"); nmod_poly_print(a), printf("\n\n"); nmod_poly_print(prod), printf("\n\n"); nmod_poly_print(q), printf("\n\n"); nmod_poly_print(r), printf("\n\n"); printf("n = %ld\n", n); abort(); } nmod_poly_clear(a); nmod_poly_clear(b); nmod_poly_clear(q); nmod_poly_clear(r); nmod_poly_clear(prod); } /* Check aliasing of a and r */ for (i = 0; i < 1000; i++) { nmod_poly_t a, b, r; mp_limb_t n; do n = n_randtest(state); while (!n_is_probabprime(n)); nmod_poly_init(a, n); nmod_poly_init(b, n); nmod_poly_init(r, n); nmod_poly_randtest(a, state, n_randint(state, 2000)); do nmod_poly_randtest(b, state, n_randint(state, 2000)); while (b->length == 0); nmod_poly_rem(r, a, b); nmod_poly_rem(a, a, b); result = (nmod_poly_equal(a, r)); if (!result) { printf("FAIL:\n"); nmod_poly_print(a), printf("\n\n"); nmod_poly_print(b), printf("\n\n"); nmod_poly_print(r), printf("\n\n"); printf("n = %ld\n", n); abort(); } nmod_poly_clear(a); nmod_poly_clear(b); nmod_poly_clear(r); } /* Check aliasing of b and r */ for (i = 0; i < 1000; i++) { nmod_poly_t a, b, r; mp_limb_t n; do n = n_randtest(state); while (!n_is_probabprime(n)); nmod_poly_init(a, n); nmod_poly_init(b, n); nmod_poly_init(r, n); nmod_poly_randtest(a, state, n_randint(state, 2000)); do nmod_poly_randtest(b, state, n_randint(state, 2000)); while (b->length == 0); nmod_poly_rem(r, a, b); nmod_poly_rem(b, a, b); result = (nmod_poly_equal(b, r)); if (!result) { printf("FAIL:\n"); nmod_poly_print(a), printf("\n\n"); nmod_poly_print(b), printf("\n\n"); nmod_poly_print(r), printf("\n\n"); printf("n = %ld\n", n); abort(); } nmod_poly_clear(a); nmod_poly_clear(b); nmod_poly_clear(r); } /* Check result of rem_q1 */ for (i = 0; i < 5000; i++) { nmod_poly_t a, b, q0, r0, r; mp_limb_t n = n_randprime(state, n_randint(state,FLINT_BITS-1)+2, 0); nmod_poly_init(a, n); nmod_poly_init(b, n); nmod_poly_init(q0, n); nmod_poly_init(r0, n); nmod_poly_init(r, n); do nmod_poly_randtest(a, state, n_randint(state, 1000)); while (a->length < 2); nmod_poly_fit_length(b, a->length - 1); mpn_zero(b->coeffs, a->length - 1); nmod_poly_randtest_not_zero(b, state, n_randint(state, 1000) + 1); do b->coeffs[a->length - 2] = n_randint(state, n); while (b->coeffs[a->length - 2] == 0); b->length = a->length - 1; nmod_poly_divrem(q0, r0, a, b); nmod_poly_rem(r, a, b); result = (nmod_poly_equal(r0, r)); if (!result) { printf("FAIL:\n"); nmod_poly_print(a), printf("\n\n"); nmod_poly_print(b), printf("\n\n"); nmod_poly_print(q0), printf("\n\n"); nmod_poly_print(r0), printf("\n\n"); nmod_poly_print(r), printf("\n\n"); printf("n = %ld\n", n); abort(); } nmod_poly_clear(a); nmod_poly_clear(b); nmod_poly_clear(q0); nmod_poly_clear(r0); nmod_poly_clear(r); } flint_randclear(state); printf("PASS\n"); return 0; }
void nmod_poly_exp_series(nmod_poly_t f, const nmod_poly_t h, long n) { mp_ptr f_coeffs, h_coeffs; nmod_poly_t t1; long hlen, k; nmod_poly_fit_length(f, n); hlen = h->length; if (hlen > 0 && h->coeffs[0] != 0UL) { printf("Exception: nmod_poly_exp_series: constant term != 0\n"); abort(); } if (n <= 1 || hlen == 0) { if (n == 0) { nmod_poly_zero(f); } else { f->coeffs[0] = 1UL; f->length = 1; } return; } /* Handle monomials */ for (k = 0; h->coeffs[k] == 0UL && k < n - 1; k++); if (k == hlen - 1 || k == n - 1) { hlen = FLINT_MIN(hlen, n); _nmod_poly_exp_series_monomial_ui(f->coeffs, h->coeffs[hlen-1], hlen - 1, n, f->mod); f->length = n; _nmod_poly_normalise(f); return; } if (n < NMOD_NEWTON_EXP_CUTOFF2) { _nmod_poly_exp_series_basecase(f->coeffs, h->coeffs, hlen, n, f->mod); f->length = n; _nmod_poly_normalise(f); return; } if (hlen < n) { h_coeffs = _nmod_vec_init(n); mpn_copyi(h_coeffs, h->coeffs, hlen); mpn_zero(h_coeffs + hlen, n - hlen); } else h_coeffs = h->coeffs; if (h == f && hlen >= n) { nmod_poly_init2(t1, h->mod.n, n); f_coeffs = t1->coeffs; } else { nmod_poly_fit_length(f, n); f_coeffs = f->coeffs; } _nmod_poly_exp_series(f_coeffs, h_coeffs, n, f->mod); if (h == f && hlen >= n) { nmod_poly_swap(f, t1); nmod_poly_clear(t1); } f->length = n; if (hlen < n) _nmod_vec_free(h_coeffs); _nmod_poly_normalise(f); }
void fmpz_mod_poly_radix(fmpz_mod_poly_struct **B, const fmpz_mod_poly_t F, const fmpz_mod_poly_radix_t D) { const long lenF = F->length; const long degF = F->length - 1; const long degR = D->degR; const long N = degF / degR; if (N == 0) { fmpz_mod_poly_set(B[0], F); } else { const long k = FLINT_BIT_COUNT(N); /* k := ceil{log{N+1}} */ const long lenG = (1L << k) * degR; /* Padded size */ const long t = (lenG - 1) / degR - N; /* Extra {degR}-blocks */ fmpz *G; /* Padded copy of F */ fmpz *T; /* Additional B[i] */ fmpz **C; /* Enlarged version of B */ fmpz *W; /* Temporary space */ long i; if (lenF < lenG) { G = flint_malloc(lenG * sizeof(fmpz)); for (i = 0; i < lenF; i++) G[i] = F->coeffs[i]; mpn_zero((mp_ptr) G + lenF, lenG - lenF); T = t ? _fmpz_vec_init(t * degR) : NULL; } else /* lenF == lenG */ { G = F->coeffs; T = NULL; } C = flint_malloc((N + 1 + t) * sizeof(fmpz *)); for (i = 0; i <= N; i++) { fmpz_mod_poly_fit_length(B[i], degR); C[i] = B[i]->coeffs; } for (i = 0; i < t; i++) { C[N + 1 + i] = T + i * degR; } W = _fmpz_vec_init(lenG); _fmpz_mod_poly_radix(C, G, D->Rpow, D->Rinv, degR, 0, k-1, W, &(F->p)); _fmpz_vec_clear(W, lenG); for (i = 0; i <= N; i++) { _fmpz_mod_poly_set_length(B[i], degR); _fmpz_mod_poly_normalise(B[i]); } flint_free(C); if (lenF < lenG) { flint_free(G); } if (t) { _fmpz_vec_clear(T, t * degR); } } }