void _nmod_poly_mulmod(mp_ptr res, mp_srcptr poly1, long len1, mp_srcptr poly2, long len2, mp_srcptr f, long lenf, nmod_t mod) { mp_ptr T, Q; long lenT, lenQ; lenT = len1 + len2 - 1; lenQ = lenT - lenf + 1; T = _nmod_vec_init(lenT + lenQ); Q = T + lenT; if (len1 >= len2) _nmod_poly_mul(T, poly1, len1, poly2, len2, mod); else _nmod_poly_mul(T, poly2, len2, poly1, len1, mod); _nmod_poly_divrem(Q, res, T, lenT, f, lenf, mod); _nmod_vec_clear(T); }
void _nmod_poly_product_roots_nmod_vec(mp_ptr poly, mp_srcptr xs, slong n, nmod_t mod) { if (n == 0) { poly[0] = UWORD(1); } else if (n < 20) { slong i, j; poly[n] = UWORD(1); poly[n - 1] = nmod_neg(xs[0], mod); for (i = 1; i < n; i++) { poly[n-i-1] = nmod_neg(n_mulmod2_preinv(poly[n-i], xs[i], mod.n, mod.ninv), mod); for (j = 0; j < i - 1; j++) { poly[n-i+j] = nmod_sub(poly[n-i+j], n_mulmod2_preinv(poly[n-i+j+1], xs[i], mod.n, mod.ninv), mod); } poly[n-1] = nmod_sub(poly[n-1], xs[i], mod); } } else { const slong m = (n + 1) / 2; mp_ptr tmp; tmp = _nmod_vec_init(n + 2); _nmod_poly_product_roots_nmod_vec(tmp, xs, m, mod); _nmod_poly_product_roots_nmod_vec(tmp + m + 1, xs + m, n - m, mod); _nmod_poly_mul(poly, tmp, m + 1, tmp + m + 1, n - m + 1, mod); _nmod_vec_clear(tmp); } }
void _nmod_poly_compose_series_horner(mp_ptr res, mp_srcptr poly1, long len1, mp_srcptr poly2, long len2, long n, nmod_t mod) { if (n == 1) { res[0] = poly1[0]; } else { long i = len1 - 1; long lenr; mp_ptr t = _nmod_vec_init(n); lenr = len2; _nmod_vec_scalar_mul_nmod(res, poly2, len2, poly1[i], mod); i--; res[0] = nmod_add(res[0], poly1[i], mod); while (i > 0) { i--; if (lenr + len2 - 1 < n) { _nmod_poly_mul(t, res, lenr, poly2, len2, mod); lenr = lenr + len2 - 1; } else { _nmod_poly_mullow(t, res, lenr, poly2, len2, n, mod); lenr = n; } _nmod_poly_add(res, t, lenr, poly1 + i, 1, mod); } _nmod_vec_zero(res + lenr, n - lenr); _nmod_vec_clear(t); } }
void _fq_nmod_pow(mp_limb_t *rop, const mp_limb_t *op, slong len, const fmpz_t e, const fq_nmod_ctx_t ctx) { const slong d = fq_nmod_ctx_degree(ctx); if (fmpz_is_zero(e)) { rop[0] = WORD(1); _nmod_vec_zero(rop + 1, 2 * d - 1 - 1); } else if (fmpz_is_one(e)) { _nmod_vec_set(rop, op, len); _nmod_vec_zero(rop + len, 2 * d - 1 - len); } else { ulong bit; mp_limb_t *v = _nmod_vec_init(2 * d - 1); mp_limb_t *R, *S, *T; _nmod_vec_zero(v, 2 * d - 1); _nmod_vec_zero(rop, 2 * d - 1); /* Set bits to the bitmask with a 1 one place lower than the msb of e */ bit = fmpz_bits(e) - 2; /* Trial run without any polynomial arithmetic to determine the parity of the number of swaps; then set R and S accordingly */ { unsigned int swaps = 0U; ulong bit2 = bit; if (fmpz_tstbit(e, bit2)) swaps = ~swaps; while (bit2--) if (!fmpz_tstbit(e, bit2)) swaps = ~swaps; if (swaps == 0U) { R = rop; S = v; } else { R = v; S = rop; } } /* We unroll the first step of the loop, referring to {op, len} */ _nmod_poly_mul(R, op, len, op, len, ctx->mod); _fq_nmod_reduce(R, 2 * len - 1, ctx); if (fmpz_tstbit(e, bit)) { _nmod_poly_mul(S, R, d, op, len, ctx->mod); _fq_nmod_reduce(S, d + len - 1, ctx); T = R; R = S; S = T; } while (bit--) { if (fmpz_tstbit(e, bit)) { _nmod_poly_mul(S, R, d, R, d, ctx->mod); _fq_nmod_reduce(S, 2 * d - 1, ctx); _nmod_poly_mul(R, S, d, op, len, ctx->mod); _fq_nmod_reduce(R, d + len - 1, ctx); } else { _nmod_poly_mul(S, R, d, R, d, ctx->mod); _fq_nmod_reduce(S, 2 * d - 1, ctx); T = R; R = S; S = T; } } _nmod_vec_clear(v); } }
void _nmod_poly_compose_divconquer(mp_ptr res, mp_srcptr poly1, long len1, mp_srcptr poly2, long len2, nmod_t mod) { long i, j, k, n; long * hlen, alloc, powlen; mp_ptr v, * h, pow, temp; if (len1 == 1) { res[0] = poly1[0]; return; } if (len2 == 1) { res[0] = _nmod_poly_evaluate_nmod(poly1, len1, poly2[0], mod); return; } if (len1 == 2) { _nmod_poly_compose_horner(res, poly1, len1, poly2, len2, mod); return; } /* Initialisation */ hlen = (long *) flint_malloc(((len1 + 1) / 2) * sizeof(long)); for (k = 1; (2 << k) < len1; k++) ; hlen[0] = hlen[1] = ((1 << k) - 1) * (len2 - 1) + 1; for (i = k - 1; i > 0; i--) { long hi = (len1 + (1 << i) - 1) / (1 << i); for (n = (hi + 1) / 2; n < hi; n++) hlen[n] = ((1 << i) - 1) * (len2 - 1) + 1; } powlen = (1 << k) * (len2 - 1) + 1; alloc = 0; for (i = 0; i < (len1 + 1) / 2; i++) alloc += hlen[i]; v = _nmod_vec_init(alloc + 2 * powlen); h = (mp_ptr *) flint_malloc(((len1 + 1) / 2) * sizeof(mp_ptr)); h[0] = v; for (i = 0; i < (len1 - 1) / 2; i++) { h[i + 1] = h[i] + hlen[i]; hlen[i] = 0; } hlen[(len1 - 1) / 2] = 0; pow = v + alloc; temp = pow + powlen; /* Let's start the actual work */ for (i = 0, j = 0; i < len1 / 2; i++, j += 2) { if (poly1[j + 1] != 0L) { _nmod_vec_scalar_mul_nmod(h[i], poly2, len2, poly1[j + 1], mod); h[i][0] = n_addmod(h[i][0], poly1[j], mod.n); hlen[i] = len2; } else if (poly1[j] != 0L) { h[i][0] = poly1[j]; hlen[i] = 1; } } if ((len1 & 1L)) { if (poly1[j] != 0L) { h[i][0] = poly1[j]; hlen[i] = 1; } } _nmod_poly_mul(pow, poly2, len2, poly2, len2, mod); powlen = 2 * len2 - 1; for (n = (len1 + 1) / 2; n > 2; n = (n + 1) / 2) { if (hlen[1] > 0) { long templen = powlen + hlen[1] - 1; _nmod_poly_mul(temp, pow, powlen, h[1], hlen[1], mod); _nmod_poly_add(h[0], temp, templen, h[0], hlen[0], mod); hlen[0] = FLINT_MAX(hlen[0], templen); } for (i = 1; i < n / 2; i++) { if (hlen[2*i + 1] > 0) { _nmod_poly_mul(h[i], pow, powlen, h[2*i + 1], hlen[2*i + 1], mod); hlen[i] = hlen[2*i + 1] + powlen - 1; } else hlen[i] = 0; _nmod_poly_add(h[i], h[i], hlen[i], h[2*i], hlen[2*i], mod); hlen[i] = FLINT_MAX(hlen[i], hlen[2*i]); } if ((n & 1L)) { mpn_copyi(h[i], h[2*i], hlen[2*i]); hlen[i] = hlen[2*i]; } _nmod_poly_mul(temp, pow, powlen, pow, powlen, mod); powlen += powlen - 1; { mp_ptr t = pow; pow = temp; temp = t; } } _nmod_poly_mul(res, pow, powlen, h[1], hlen[1], mod); _nmod_vec_add(res, res, h[0], hlen[0], mod); _nmod_vec_clear(v); flint_free(h); flint_free(hlen); }