void _nmod_poly_revert_series_lagrange(mp_ptr Qinv, mp_srcptr Q, long n, nmod_t mod) { long i; mp_ptr R, S, T, tmp; if (n >= 1) Qinv[0] = 0UL; if (n >= 2) Qinv[1] = n_invmod(Q[1], mod.n); if (n <= 2) return; R = _nmod_vec_init(n - 1); S = _nmod_vec_init(n - 1); T = _nmod_vec_init(n - 1); _nmod_poly_inv_series(R, Q + 1, n - 1, mod); _nmod_vec_set(S, R, n - 1); for (i = 2; i < n; i++) { _nmod_poly_mullow(T, S, n - 1, R, n - 1, n - 1, mod); Qinv[i] = nmod_div(T[i - 1], i, mod); tmp = S; S = T; T = tmp; } _nmod_vec_clear(R); _nmod_vec_clear(S); _nmod_vec_clear(T); }
void nmod_mat_set(nmod_mat_t B, const nmod_mat_t A) { if (A->mod.n <= B->mod.n) _nmod_vec_set(B->entries, A->entries, A->r*A->c); else _nmod_vec_reduce(B->entries, A->entries, A->r*A->c, B->mod); }
void nmod_poly_mulmod(nmod_poly_t res, const nmod_poly_t poly1, const nmod_poly_t poly2, const nmod_poly_t f) { long len1, len2, lenf; mp_ptr fcoeffs; lenf = f->length; len1 = poly1->length; len2 = poly2->length; if (lenf == 0) { printf("Exception: nmod_poly_mulmod: divide by zero\n"); abort(); } if (lenf == 1 || len1 == 0 || len2 == 0) { nmod_poly_zero(res); return; } if (len1 + len2 - lenf > 0) { if (f == res) { fcoeffs = flint_malloc(sizeof(mp_limb_t) * lenf); _nmod_vec_set(fcoeffs, f->coeffs, lenf); } else fcoeffs = f->coeffs; nmod_poly_fit_length(res, lenf - 1); _nmod_poly_mulmod(res->coeffs, poly1->coeffs, len1, poly2->coeffs, len2, fcoeffs, lenf, res->mod); if (f == res) flint_free(fcoeffs); res->length = lenf - 1; _nmod_poly_normalise(res); } else { nmod_poly_mul(res, poly1, poly2); } }
void _nmod_poly_tanh_series(mp_ptr f, mp_srcptr h, long n, nmod_t mod) { mp_ptr t, u; t = _nmod_vec_init(n); u = _nmod_vec_init(n); _nmod_vec_add(t, h, h, n, mod); _nmod_poly_exp_series(u, t, n, mod); _nmod_vec_set(t, u, n); t[0] = 0UL; u[0] = 2UL; _nmod_poly_div_series(f, t, u, n, mod); _nmod_vec_free(t); _nmod_vec_free(u); }
void _fq_nmod_pow(mp_limb_t *rop, const mp_limb_t *op, slong len, const fmpz_t e, const fq_nmod_ctx_t ctx) { const slong d = fq_nmod_ctx_degree(ctx); if (fmpz_is_zero(e)) { rop[0] = WORD(1); _nmod_vec_zero(rop + 1, 2 * d - 1 - 1); } else if (fmpz_is_one(e)) { _nmod_vec_set(rop, op, len); _nmod_vec_zero(rop + len, 2 * d - 1 - len); } else { ulong bit; mp_limb_t *v = _nmod_vec_init(2 * d - 1); mp_limb_t *R, *S, *T; _nmod_vec_zero(v, 2 * d - 1); _nmod_vec_zero(rop, 2 * d - 1); /* Set bits to the bitmask with a 1 one place lower than the msb of e */ bit = fmpz_bits(e) - 2; /* Trial run without any polynomial arithmetic to determine the parity of the number of swaps; then set R and S accordingly */ { unsigned int swaps = 0U; ulong bit2 = bit; if (fmpz_tstbit(e, bit2)) swaps = ~swaps; while (bit2--) if (!fmpz_tstbit(e, bit2)) swaps = ~swaps; if (swaps == 0U) { R = rop; S = v; } else { R = v; S = rop; } } /* We unroll the first step of the loop, referring to {op, len} */ _nmod_poly_mul(R, op, len, op, len, ctx->mod); _fq_nmod_reduce(R, 2 * len - 1, ctx); if (fmpz_tstbit(e, bit)) { _nmod_poly_mul(S, R, d, op, len, ctx->mod); _fq_nmod_reduce(S, d + len - 1, ctx); T = R; R = S; S = T; } while (bit--) { if (fmpz_tstbit(e, bit)) { _nmod_poly_mul(S, R, d, R, d, ctx->mod); _fq_nmod_reduce(S, 2 * d - 1, ctx); _nmod_poly_mul(R, S, d, op, len, ctx->mod); _fq_nmod_reduce(R, d + len - 1, ctx); } else { _nmod_poly_mul(S, R, d, R, d, ctx->mod); _fq_nmod_reduce(S, 2 * d - 1, ctx); T = R; R = S; S = T; } } _nmod_vec_clear(v); } }
slong _nmod_poly_xgcd_hgcd(mp_ptr G, mp_ptr S, mp_ptr T, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod) { const slong cutoff = FLINT_BIT_COUNT(mod.n) <= 8 ? NMOD_POLY_SMALL_GCD_CUTOFF : NMOD_POLY_GCD_CUTOFF; slong lenG, lenS, lenT; if (lenB == 1) { G[0] = B[0]; T[0] = 1; lenG = 1; lenS = 0; lenT = 1; } else { mp_ptr q = _nmod_vec_init(lenA + lenB); mp_ptr r = q + lenA; slong lenq, lenr; __divrem(q, lenq, r, lenr, A, lenA, B, lenB); if (lenr == 0) { __set(G, lenG, B, lenB); T[0] = 1; lenS = 0; lenT = 1; } else { mp_ptr h, j, v, w, R[4], X; slong lenh, lenj, lenv, lenw, lenR[4]; int sgnR; lenh = lenj = lenB; lenv = lenw = lenA + lenB - 2; lenR[0] = lenR[1] = lenR[2] = lenR[3] = (lenB + 1) / 2; X = _nmod_vec_init(2 * lenh + 2 * lenv + 4 * lenR[0]); h = X; j = h + lenh; v = j + lenj; w = v + lenv; R[0] = w + lenw; R[1] = R[0] + lenR[0]; R[2] = R[1] + lenR[1]; R[3] = R[2] + lenR[2]; sgnR = _nmod_poly_hgcd(R, lenR, h, &lenh, j, &lenj, B, lenB, r, lenr, mod); if (sgnR > 0) { _nmod_vec_neg(S, R[1], lenR[1], mod); _nmod_vec_set(T, R[0], lenR[0]); } else { _nmod_vec_set(S, R[1], lenR[1]); _nmod_vec_neg(T, R[0], lenR[0], mod); } lenS = lenR[1]; lenT = lenR[0]; while (lenj != 0) { __divrem(q, lenq, r, lenr, h, lenh, j, lenj); __mul(v, lenv, q, lenq, T, lenT); { slong l; _nmod_vec_swap(S, T, FLINT_MAX(lenS, lenT)); l = lenS; lenS = lenT; lenT = l; } __sub(T, lenT, T, lenT, v, lenv); if (lenr == 0) { __set(G, lenG, j, lenj); goto cofactor; } if (lenj < cutoff) { mp_ptr u0 = R[0], u1 = R[1]; slong lenu0 = lenr - 1, lenu1 = lenj - 1; lenG = _nmod_poly_xgcd_euclidean(G, u0, u1, j, lenj, r, lenr, mod); MPN_NORM(u0, lenu0); MPN_NORM(u1, lenu1); __mul(v, lenv, S, lenS, u0, lenu0); __mul(w, lenw, T, lenT, u1, lenu1); __add(S, lenS, v, lenv, w, lenw); goto cofactor; } sgnR = _nmod_poly_hgcd(R, lenR, h, &lenh, j, &lenj, j,lenj, r, lenr, mod); __mul(v, lenv, R[1], lenR[1], T, lenT); __mul(w, lenw, R[2], lenR[2], S, lenS); __mul(q, lenq, S, lenS, R[3], lenR[3]); if (sgnR > 0) __sub(S, lenS, q, lenq, v, lenv); else __sub(S, lenS, v, lenv, q, lenq); __mul(q, lenq, T, lenT, R[0], lenR[0]); if (sgnR > WORD(0)) __sub(T, lenT, q, lenq, w, lenw); else __sub(T, lenT, w, lenw, q, lenq); } __set(G, lenG, h, lenh); cofactor: __mul(v, lenv, S, lenS, A, lenA); __sub(w, lenw, G, lenG, v, lenv); __div(T, lenT, w, lenw, B, lenB); _nmod_vec_clear(X); } _nmod_vec_clear(q); } flint_mpn_zero(S + lenS, lenB - 1 - lenS); flint_mpn_zero(T + lenT, lenA - 1 - lenT); return lenG; }
void _nmod_poly_compose_mod_brent_kung(mp_ptr res, mp_srcptr poly1, long len1, mp_srcptr poly2, mp_srcptr poly3, long len3, nmod_t mod) { nmod_mat_t A, B, C; mp_ptr t, h; long i, n, m; n = len3 - 1; if (len3 == 1) return; if (len1 == 1) { res[0] = poly1[0]; return; } if (len3 == 2) { res[0] = _nmod_poly_evaluate_nmod(poly1, len1, poly2[0], mod); return; } m = n_sqrt(n) + 1; nmod_mat_init(A, m, n, mod.n); nmod_mat_init(B, m, m, mod.n); nmod_mat_init(C, m, n, mod.n); h = _nmod_vec_init(n); t = _nmod_vec_init(n); /* Set rows of B to the segments of poly1 */ for (i = 0; i < len1 / m; i++) _nmod_vec_set(B->rows[i], poly1 + i*m, m); _nmod_vec_set(B->rows[i], poly1 + i*m, len1 % m); /* Set rows of A to powers of poly2 */ A->rows[0][0] = 1UL; _nmod_vec_set(A->rows[1], poly2, n); for (i = 2; i < m; i++) _nmod_poly_mulmod(A->rows[i], A->rows[i-1], n, poly2, n, poly3, len3, mod); nmod_mat_mul(C, B, A); /* Evaluate block composition using the Horner scheme */ _nmod_vec_set(res, C->rows[m - 1], n); _nmod_poly_mulmod(h, A->rows[m - 1], n, poly2, n, poly3, len3, mod); for (i = m - 2; i >= 0; i--) { _nmod_poly_mulmod(t, res, n, h, n, poly3, len3, mod); _nmod_poly_add(res, t, n, C->rows[i], n, mod); } _nmod_vec_clear(h); _nmod_vec_clear(t); nmod_mat_clear(A); nmod_mat_clear(B); nmod_mat_clear(C); }