void _nmod_poly_div_basecase_2(mp_ptr Q, mp_ptr W, mp_srcptr A, long A_len, mp_srcptr B, long B_len, nmod_t mod) { long coeff, i, len; mp_limb_t lead_inv = n_invmod(B[B_len - 1], mod.n); mp_ptr B2, R2; mp_srcptr Btop; B2 = W; for (i = 0; i < B_len - 1; i++) { B2[2 * i] = B[i]; B2[2 * i + 1] = 0; } Btop = B2 + 2*(B_len - 1); R2 = W + 2*(B_len - 1); for (i = 0; i < A_len - B_len + 1; i++) { R2[2 * i] = A[B_len + i - 1]; R2[2 * i + 1] = 0; } coeff = A_len - B_len; while (coeff >= 0) { mp_limb_t r_coeff; r_coeff = n_ll_mod_preinv(R2[2 * coeff + 1], R2[2 * coeff], mod.n, mod.ninv); while (coeff >= 0 && r_coeff == 0L) { Q[coeff--] = 0L; if (coeff >= 0) r_coeff = n_ll_mod_preinv(R2[2 * coeff + 1], R2[2 * coeff], mod.n, mod.ninv); } if (coeff >= 0) { mp_limb_t c, * R_sub; Q[coeff] = n_mulmod2_preinv(r_coeff, lead_inv, mod.n, mod.ninv); c = n_negmod(Q[coeff], mod.n); len = FLINT_MIN(B_len - 1, coeff); R_sub = R2 + 2 * (coeff - len); if (len > 0) mpn_addmul_1(R_sub, Btop - 2*len, 2 * len, c); coeff--; } } }
mp_limb_t n_powmod_precomp(mp_limb_t a, mp_limb_signed_t exp, mp_limb_t n, double npre) { mp_limb_t x, y; mp_limb_signed_t e; if (n == 1UL) return 0L; e = (exp < 0L ? -exp : exp); x = 1UL; y = a; while (e) { if (e & 1L) x = n_mulmod_precomp(x, y, n, npre); e >>= 1; if (e) y = n_mulmod_precomp(y, y, n, npre); } return (exp < 0L ? n_invmod(x, n) : x); }
void _nmod_poly_revert_series_lagrange(mp_ptr Qinv, mp_srcptr Q, long n, nmod_t mod) { long i; mp_ptr R, S, T, tmp; if (n >= 1) Qinv[0] = 0UL; if (n >= 2) Qinv[1] = n_invmod(Q[1], mod.n); if (n <= 2) return; R = _nmod_vec_init(n - 1); S = _nmod_vec_init(n - 1); T = _nmod_vec_init(n - 1); _nmod_poly_inv_series(R, Q + 1, n - 1, mod); _nmod_vec_set(S, R, n - 1); for (i = 2; i < n; i++) { _nmod_poly_mullow(T, S, n - 1, R, n - 1, n - 1, mod); Qinv[i] = nmod_div(T[i - 1], i, mod); tmp = S; S = T; T = tmp; } _nmod_vec_clear(R); _nmod_vec_clear(S); _nmod_vec_clear(T); }
void qsieve_ll_compute_B_terms(qs_t qs_inf) { long s = qs_inf->s; mp_limb_t * A_ind = qs_inf->A_ind; mp_limb_t * A_modp = qs_inf->A_modp; mp_limb_t * B_terms = qs_inf->B_terms; prime_t * factor_base = qs_inf->factor_base; mp_limb_t A = qs_inf->A; mp_limb_t B; mp_limb_t p, temp, temp2, pinv; long i; for (i = 0; i < s; i++) { p = factor_base[A_ind[i]].p; pinv = factor_base[A_ind[i]].pinv; temp = A/p; /* TODO: possibly use precomputed inverse here */ A_modp[i] = (temp2 = n_mod2_preinv(temp, p, pinv)); temp2 = n_invmod(temp2, p); temp2 = n_mulmod2_preinv(temp2, qs_inf->sqrts[A_ind[i]], p, pinv); if (temp2 > p/2) temp2 = p - temp2; B_terms[i] = temp*temp2; } B = B_terms[0]; for (i = 1; i < s; i++) { B += B_terms[i]; } qs_inf->B = B; }
void fmpz_mat_CRT_ui(fmpz_mat_t res, const fmpz_mat_t mat1, const fmpz_t m1, const nmod_mat_t mat2, int sign) { long i, j; mp_limb_t c; mp_limb_t m2 = mat2->mod.n; mp_limb_t m2inv = mat2->mod.ninv; fmpz_t m1m2; c = fmpz_fdiv_ui(m1, m2); c = n_invmod(c, m2); if (c == 0) { printf("Exception in fmpz_mat_CRT_ui: m1 not invertible modulo m2!\n"); abort(); } fmpz_init(m1m2); fmpz_mul_ui(m1m2, m1, m2); for (i = 0; i < mat1->r; i++) { for (j = 0; j < mat1->c; j++) _fmpz_CRT_ui_precomp(fmpz_mat_entry(res, i, j), fmpz_mat_entry(mat1, i, j), m1, nmod_mat_entry(mat2, i, j), m2, m2inv, m1m2, c, sign); } fmpz_clear(m1m2); }
void _nmod_poly_rem_basecase_1(mp_ptr R, mp_ptr W, mp_srcptr A, long lenA, mp_srcptr B, long lenB, nmod_t mod) { if (lenB > 1) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); long iR; mp_ptr R1 = W; mpn_copyi(R1, A, lenA); for (iR = lenA - 1; iR >= lenB - 1; iR--) { if (R1[iR] != 0) { const mp_limb_t q = n_mulmod2_preinv(R1[iR], invL, mod.n, mod.ninv); const mp_limb_t c = n_negmod(q, mod.n); mpn_addmul_1(R1 + iR - lenB + 1, B, lenB - 1, c); } } _nmod_vec_reduce(R, R1, lenB - 1, mod); } }
void _nmod_poly_divrem_basecase_1(mp_ptr Q, mp_ptr R, mp_ptr W, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); slong iR; mp_ptr ptrQ = Q - lenB + 1; mp_ptr R1 = W; flint_mpn_copyi(R1, A, lenA); for (iR = lenA - 1; iR >= lenB - 1; iR--) { if (R1[iR] == 0) { ptrQ[iR] = WORD(0); } else { ptrQ[iR] = n_mulmod2_preinv(R1[iR], invL, mod.n, mod.ninv); if (lenB > 1) { const mp_limb_t c = n_negmod(ptrQ[iR], mod.n); mpn_addmul_1(R1 + iR - lenB + 1, B, lenB - 1, c); } } } if (lenB > 1) _nmod_vec_reduce(R, R1, lenB - 1, mod); }
void _nmod_poly_taylor_shift_convolution(mp_ptr p, mp_limb_t c, slong len, nmod_t mod) { slong i, n = len - 1; mp_limb_t f, d; mp_ptr t, u; if (c == 0 || len <= 1) return; t = _nmod_vec_init(len); u = _nmod_vec_init(len); f = 1; for (i = 2; i <= n; i++) { f = n_mulmod2_preinv(f, i, mod.n, mod.ninv); p[i] = n_mulmod2_preinv(p[i], f, mod.n, mod.ninv); } _nmod_poly_reverse(p, p, len, len); t[n] = 1; for (i = n; i > 0; i--) t[i - 1] = n_mulmod2_preinv(t[i], i, mod.n, mod.ninv); if (c == mod.n - 1) { for (i = 1; i <= n; i += 2) t[i] = nmod_neg(t[i], mod); } else if (c != 1) { d = c; for (i = 1; i <= n; i++) { t[i] = n_mulmod2_preinv(t[i], d, mod.n, mod.ninv); d = n_mulmod2_preinv(d, c, mod.n, mod.ninv); } } _nmod_poly_mullow(u, p, len, t, len, len, mod); f = n_mulmod2_preinv(f, f, mod.n, mod.ninv); f = n_invmod(f, mod.n); for (i = n; i >= 0; i--) { p[i] = n_mulmod2_preinv(u[n - i], f, mod.n, mod.ninv); f = n_mulmod2_preinv(f, (i == 0) ? 1 : i, mod.n, mod.ninv); } _nmod_vec_clear(t); _nmod_vec_clear(u); }
static __inline__ mp_limb_t invert_det_divisor_modulo_pk(mpz_t dd,p_k_pk_t const* pp,nmod_t const* mod) // take dd modulo p_deg_k, then invert it { mp_limb_t r,m=pp->p_deg_k; r=mpz_fdiv_ui(dd, m); if(pp->k==1) return n_invmod(r,m); else return inv_mod_pk_3arg(r,pp[0],mod[0]); }
mp_limb_t n_powmod2_preinv(mp_limb_t a, mp_limb_signed_t exp, mp_limb_t n, mp_limb_t ninv) { if (exp < WORD(0)) { a = n_invmod(a, n); exp = -exp; } return n_powmod2_ui_preinv(a, exp, n, ninv); }
void _nmod_poly_integral(mp_ptr x_int, mp_srcptr x, slong len, nmod_t mod) { mp_limb_t r; slong k = len - 1; while (k > 0) { if (k > 3 && k < PROD_TAKE4) { r = n_invmod(k*(k-1)*(k-2)*(k-3), mod.n); x_int[k] = MUL3(x[k-1], r, (k-1)*(k-2)*(k-3)); x_int[k-1] = MUL3(x[k-2], r, k*(k-2)*(k-3)); x_int[k-2] = MUL3(x[k-3], r, k*(k-1)*(k-3)); x_int[k-3] = MUL3(x[k-4], r, k*(k-1)*(k-2)); k -= 4; } else if (k > 2 && k < PROD_TAKE3) { r = n_invmod(k*(k-1)*(k-2), mod.n); x_int[k] = MUL3(x[k-1], r, (k-1)*(k-2)); x_int[k-1] = MUL3(x[k-2], r, k*(k-2)); x_int[k-2] = MUL3(x[k-3], r, k*(k-1)); k -= 3; } else if (k > 1 && k < PROD_TAKE2) { r = n_invmod(k*(k-1), mod.n); x_int[k] = MUL3(x[k-1], r, k-1); x_int[k-1] = MUL3(x[k-2], r, k); k -= 2; } else { r = n_invmod(k, mod.n); x_int[k] = n_mulmod2_preinv(x[k-1], r, mod.n, mod.ninv); k -= 1; } } x_int[0] = UWORD(0); }
void _nmod_poly_divrem_basecase_3(mp_ptr Q, mp_ptr R, mp_ptr W, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); slong iR, i; mp_ptr B3 = W, R3 = W + 3*(lenB - 1), ptrQ = Q - lenB + 1; for (i = 0; i < lenB - 1; i++) { B3[3 * i] = B[i]; B3[3 * i + 1] = 0; B3[3 * i + 2] = 0; } for (i = 0; i < lenA; i++) { R3[3 * i] = A[i]; R3[3 * i + 1] = 0; R3[3 * i + 2] = 0; } for (iR = lenA - 1; iR >= lenB - 1; ) { mp_limb_t r = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1], R3[3 * iR], mod.n, mod.ninv); while ((iR + 1 >= lenB) && (r == WORD(0))) { ptrQ[iR--] = WORD(0); if (iR + 1 >= lenB) r = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1], R3[3 * iR], mod.n, mod.ninv); } if (iR + 1 >= lenB) { ptrQ[iR] = n_mulmod2_preinv(r, invL, mod.n, mod.ninv); if (lenB > 1) { const mp_limb_t c = n_negmod(ptrQ[iR], mod.n); mpn_addmul_1(R3 + 3 * (iR - lenB + 1), B3, 3 * lenB - 3, c); } iR--; } } for (iR = 0; iR < lenB - 1; iR++) R[iR] = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1], R3[3 * iR], mod.n, mod.ninv); }
void qsieve_ll_compute_A_factor_offsets(qs_t qs_inf) { long s = qs_inf->s; mp_limb_t * A_ind = qs_inf->A_ind; mp_limb_t * A_modp = qs_inf->A_modp; mp_limb_t * soln1 = qs_inf->soln1; mp_limb_t * soln2 = qs_inf->soln2; mp_limb_t p, D; mp_limb_t hi = qs_inf->hi; mp_limb_t lo = qs_inf->lo; mp_limb_t B = qs_inf->B; mp_limb_t temp, temp2, B_modp2, index, p2; prime_t * factor_base = qs_inf->factor_base; mp_limb_t * inv_p2 = qs_inf->inv_p2; mp_limb_t pinv; long j; for (j = 0; j < s; j++) { index = A_ind[j]; p = factor_base[index].p; p2 = p*p; pinv = factor_base[index].pinv; D = n_ll_mod_preinv(hi, lo, p*p, inv_p2[j]); if ((mp_limb_signed_t) B < 0) { B_modp2 = n_mod2_preinv(-B, p2, inv_p2[j]); B_modp2 = p2 - B_modp2; if (B_modp2 == p2) B_modp2 = 0; } else B_modp2 = n_mod2_preinv(B, p2, inv_p2[j]); temp = B_modp2*A_modp[j]; temp = n_mod2_preinv(temp, p, pinv); temp2 = n_invmod(temp, p); D -= (B_modp2*B_modp2); if ((mp_limb_signed_t) D < 0) temp = -(-D/p); /* TODO consider using precomputed inverse */ else temp = (D/p); /* TODO consider using precomputed inverse */ temp *= temp2; temp += qs_inf->sieve_size/2; if ((mp_limb_signed_t) temp < 0) { temp = p - n_mod2_preinv(-temp, p, pinv); if (temp == p) temp = 0; } else temp = n_mod2_preinv(temp, p, pinv); soln1[index] = temp; soln2[index] = -1; } }
long hmod_mat_lu_classical(long * P, hmod_mat_t A, int rank_check) { hlimb_t d, e, **a; nmod_t mod; long i, m, n, rank, length, row, col; m = A->r; n = A->c; a = A->rows; mod = A->mod; rank = row = col = 0; for (i = 0; i < m; i++) P[i] = i; while (row < m && col < n) { if (hmod_mat_pivot(A, P, row, col) == 0) { if (rank_check) return 0; col++; continue; } rank++; d = a[row][col]; d = n_invmod(d, mod.n); length = n - col - 1; for (i = row + 1; i < m; i++) { e = n_mulmod2_preinv(a[i][col], d, mod.n, mod.ninv); if (length != 0) _hmod_vec_scalar_addmul_hmod(a[i] + col + 1, a[row] + col + 1, length, nmod_neg(e, mod), mod); a[i][col] = 0; a[i][rank - 1] = e; } row++; col++; } return rank; }
void nmod_mat_solve_tril_classical(nmod_mat_t X, const nmod_mat_t L, const nmod_mat_t B, int unit) { int nlimbs; long i, j, n, m; nmod_t mod; mp_ptr inv, tmp; n = L->r; m = B->c; mod = L->mod; if (!unit) { inv = _nmod_vec_init(n); for (i = 0; i < n; i++) inv[i] = n_invmod(nmod_mat_entry(L, i, i), mod.n); } else inv = NULL; nlimbs = _nmod_vec_dot_bound_limbs(n, mod); tmp = _nmod_vec_init(n); for (i = 0; i < m; i++) { for (j = 0; j < n; j++) tmp[j] = nmod_mat_entry(X, j, i); for (j = 0; j < n; j++) { mp_limb_t s; s = _nmod_vec_dot(L->rows[j], tmp, j, mod, nlimbs); s = nmod_sub(nmod_mat_entry(B, j, i), s, mod); if (!unit) s = n_mulmod2_preinv(s, inv[j], mod.n, mod.ninv); tmp[j] = s; } for (j = 0; j < n; j++) nmod_mat_entry(X, j, i) = tmp[j]; } _nmod_vec_clear(tmp); if (!unit) _nmod_vec_clear(inv); }
void qsieve_ll_compute_off_adj(qs_t qs_inf) { long num_primes = qs_inf->num_primes; mp_limb_t A = qs_inf->A; mp_limb_t B = qs_inf->B; mp_limb_t * A_inv = qs_inf->A_inv; mp_limb_t ** A_inv2B = qs_inf->A_inv2B; mp_limb_t * B_terms = qs_inf->B_terms; mp_limb_t * soln1 = qs_inf->soln1; mp_limb_t * soln2 = qs_inf->soln2; int * sqrts = qs_inf->sqrts; prime_t * factor_base = qs_inf->factor_base; long s = qs_inf->s; mp_limb_t p, temp, pinv; long i, j; for (i = 2; i < num_primes; i++) /* skip k and 2 */ { p = factor_base[i].p; pinv = factor_base[i].pinv; A_inv[i] = n_invmod(n_mod2_preinv(A, p, pinv), p); for (j = 0; j < s; j++) { temp = n_mod2_preinv(B_terms[j], p, pinv); temp = n_mulmod2_preinv(temp, A_inv[i], p, pinv); temp *= 2; if (temp >= p) temp -= p; A_inv2B[j][i] = temp; } temp = n_mod2_preinv(B, p, pinv); temp = sqrts[i] + p - temp; temp *= A_inv[i]; temp += qs_inf->sieve_size/2; soln1[i] = n_mod2_preinv(temp, p, pinv); temp = p - sqrts[i]; if (temp == p) temp -= p; temp = n_mulmod2_preinv(temp, A_inv[i], p, pinv); temp *= 2; if (temp >= p) temp -= p; soln2[i] = temp + soln1[i]; if (soln2[i] >= p) soln2[i] -= p; } }
ulong dlog_crt_init(dlog_crt_t t, ulong a, ulong mod, ulong n, ulong num) { int k; n_factor_t fac; ulong * M, * u; ulong cost = 0; n_factor_init(&fac); n_factor(&fac, n, 1); t->num = fac.num; nmod_init(&t->mod,mod); nmod_init(&t->n, n); M = t->expo = flint_malloc(t->num * sizeof(ulong)); u = t->crt_coeffs = flint_malloc(t->num * sizeof(ulong)); t->pre = flint_malloc(t->num * sizeof(dlog_precomp_struct)); for (k = 0; k < t->num; k++) { ulong p, e, mk; p = fac.p[k]; e = fac.exp[k]; if (0 && mod % p == 0) { flint_printf("dlog_crt_init: modulus must be prime to order.\n"); abort(); } mk = n_pow(p, e); M[k] = n / mk; u[k] = nmod_mul(M[k], n_invmod(M[k] % mk, mk), t->n); /* depends on the power */ #if 0 flint_printf("[sub-crt -- init for size %wu mod %wu]\n", mk, mod); #endif dlog_precomp_pe_init(t->pre + k, nmod_pow_ui(a, M[k], t->mod), mod, p, e, mk, num); cost += t->pre[k].cost; } #if 0 if (cost > 500) flint_printf("[crt init for size %wu mod %wu -> cost %wu]\n", n,mod,cost); #endif return cost; }
void _nmod_poly_exp_series_monomial_ui(mp_ptr res, mp_limb_t coeff, ulong power, slong n, nmod_t mod) { slong k, r; mp_limb_t rfac; mp_limb_t a; r = (n - 1) / power; rfac = n_factorial_mod2_preinv(r, mod.n, mod.ninv); rfac = n_invmod(rfac, mod.n); if (power > 1) _nmod_vec_zero(res, n); res[0] = UWORD(1); if (coeff == UWORD(1)) { a = rfac; for (k = r; k >= 1; k--) { res[k * power] = a; a = n_mulmod2_preinv(a, k, mod.n, mod.ninv); } } else { a = coeff; for (k = power; k < n; k += power) { res[k] = a; a = n_mulmod2_preinv(a, coeff, mod.n, mod.ninv); } a = rfac; for (k = r; k >= 1; k--) { res[k * power] = n_mulmod2_preinv(res[k * power], a, mod.n, mod.ninv); a = n_mulmod2_preinv(a, k, mod.n, mod.ninv); } } }
void _nmod_poly_interpolate_nmod_vec_barycentric(mp_ptr poly, mp_srcptr xs, mp_srcptr ys, slong n, nmod_t mod) { mp_ptr P, Q, w; slong i, j; if (n == 1) { poly[0] = ys[0]; return; } P = _nmod_vec_init(n + 1); Q = _nmod_vec_init(n); w = _nmod_vec_init(n); _nmod_poly_product_roots_nmod_vec(P, xs, n, mod); for (i = 0; i < n; i++) { w[i] = UWORD(1); for (j = 0; j < n; j++) { if (i != j) w[i] = nmod_mul(w[i], nmod_sub(xs[i], xs[j], mod), mod); } w[i] = n_invmod(w[i], mod.n); } _nmod_vec_zero(poly, n); for (i = 0; i < n; i++) { _nmod_poly_div_root(Q, P, n + 1, xs[i], mod); _nmod_vec_scalar_addmul_nmod(poly, Q, n, nmod_mul(w[i], ys[i], mod), mod); } _nmod_vec_clear(P); _nmod_vec_clear(Q); _nmod_vec_clear(w); }
void _nmod_poly_div_basecase_1(mp_ptr Q, mp_ptr W, mp_srcptr A, long A_len, mp_srcptr B, long B_len, nmod_t mod) { mp_limb_t lead_inv = n_invmod(B[B_len - 1], mod.n); long len, coeff = A_len - B_len; mp_ptr R1 = W; mp_srcptr Btop = B + B_len - 1; mpn_copyi(R1, A + B_len - 1, A_len - B_len + 1); while (coeff >= 0) { R1[coeff] = n_mod2_preinv(R1[coeff], mod.n, mod.ninv); while (coeff >= 0 && R1[coeff] == 0L) { Q[coeff--] = 0L; if (coeff >= 0) R1[coeff] = n_mod2_preinv(R1[coeff], mod.n, mod.ninv); } if (coeff >= 0) { mp_limb_t c, * R_sub; Q[coeff] = n_mulmod2_preinv(R1[coeff], lead_inv, mod.n, mod.ninv); c = n_negmod(Q[coeff], mod.n); len = FLINT_MIN(B_len - 1, coeff); R_sub = R1 + coeff - len; if (len > 0) mpn_addmul_1(R_sub, Btop - len, len, c); coeff--; } } }
void _nmod_poly_cosh_series(mp_ptr f, mp_srcptr h, long n, nmod_t mod) { mp_ptr g, T, U, hprime; g = _nmod_vec_init(n); T = _nmod_vec_init(n); U = _nmod_vec_init(n); hprime = _nmod_vec_init(n); _nmod_poly_derivative(hprime, h, n, mod); hprime[n-1] = 0UL; __nmod_poly_exp_series_prealloc(f, g, h, hprime, T, U, n, mod, 1); _nmod_vec_add(f, f, g, n, mod); _nmod_vec_scalar_mul_nmod(f, f, n, n_invmod(2UL, mod.n), mod); _nmod_vec_free(hprime); _nmod_vec_free(g); _nmod_vec_free(T); _nmod_vec_free(U); }
void _nmod_poly_rem_basecase_3(mp_ptr R, mp_ptr W, mp_srcptr A, long lenA, mp_srcptr B, long lenB, nmod_t mod) { if (lenB > 1) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); long iR, i; mp_ptr B3 = W, R3 = W + 3*(lenB - 1); for (i = 0; i < lenB - 1; i++) { B3[3 * i] = B[i]; B3[3 * i + 1] = 0; B3[3 * i + 2] = 0; } for (i = 0; i < lenA; i++) { R3[3 * i] = A[i]; R3[3 * i + 1] = 0; R3[3 * i + 2] = 0; } for (iR = lenA - 1; iR >= lenB - 1; iR--) { const mp_limb_t r = n_lll_mod_preinv(R3[3*iR + 2], R3[3*iR + 1], R3[3*iR], mod.n, mod.ninv); if (r != 0) { const mp_limb_t q = n_mulmod2_preinv(r, invL, mod.n, mod.ninv); const mp_limb_t c = n_negmod(q, mod.n); mpn_addmul_1(R3 + 3 * (iR - lenB + 1), B3, 3 * lenB - 3, c); } } for (iR = 0; iR < lenB - 1; iR++) R[iR] = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1], R3[3 * iR], mod.n, mod.ninv); } }
int nmod_mat_inv(nmod_mat_t B, const nmod_mat_t A) { nmod_mat_t I; long i, dim; int result; dim = A->r; switch (dim) { case 0: result = 1; break; case 1: if (nmod_mat_entry(A, 0, 0) == 0UL) { result = 0; } else { nmod_mat_entry(B, 0, 0) = n_invmod(nmod_mat_entry(A, 0, 0), B->mod.n); result = 1; } break; default: nmod_mat_init(I, dim, dim, B->mod.n); for (i = 0; i < dim; i++) nmod_mat_entry(I, i, i) = 1UL; result = nmod_mat_solve(B, A, I); nmod_mat_clear(I); } return result; }
void _nmod_poly_divrem_q1(mp_ptr Q, mp_ptr R, mp_srcptr A, long lenA, mp_srcptr B, long lenB, nmod_t mod) { const mp_limb_t invL = (B[lenB-1] == 1) ? 1 : n_invmod(B[lenB-1], mod.n); if (lenB == 1) { _nmod_vec_scalar_mul_nmod(Q, A, lenA, invL, mod); } else { mp_limb_t t; Q[1] = n_mulmod2_preinv(A[lenA-1], invL, mod.n, mod.ninv); t = n_mulmod2_preinv(Q[1], B[lenB-2], mod.n, mod.ninv); t = n_submod(A[lenA-2], t, mod.n); Q[0] = n_mulmod2_preinv(t, invL, mod.n, mod.ninv); if (FLINT_BITS + 2 <= 2 * mod.norm) { mpn_mul_1(R, B, lenB - 1, Q[0]); if (lenB > 2) mpn_addmul_1(R + 1, B, lenB - 2, Q[1]); _nmod_vec_reduce(R, R, lenB - 1, mod); } else { _nmod_vec_scalar_mul_nmod(R, B, lenB - 1, Q[0], mod); if (lenB > 2) _nmod_vec_scalar_addmul_nmod(R + 1, B, lenB - 2, Q[1], mod); } _nmod_vec_sub(R, A, R, lenB - 1, mod); } }
void nmod_poly_xgcd_hgcd(nmod_poly_t G, nmod_poly_t S, nmod_poly_t T, const nmod_poly_t A, const nmod_poly_t B) { if (A->length < B->length) { nmod_poly_xgcd_hgcd(G, T, S, B, A); } else /* lenA >= lenB >= 0 */ { const slong lenA = A->length, lenB = B->length; mp_limb_t inv; if (lenA == 0) /* lenA = lenB = 0 */ { nmod_poly_zero(G); nmod_poly_zero(S); nmod_poly_zero(T); } else if (lenB == 0) /* lenA > lenB = 0 */ { inv = n_invmod(A->coeffs[lenA - 1], A->mod.n); nmod_poly_scalar_mul_nmod(G, A, inv); nmod_poly_zero(T); nmod_poly_set_coeff_ui(S, 0, inv); S->length = 1; } else if (lenB == 1) /* lenA >= lenB = 1 */ { nmod_poly_fit_length(T, 1); T->length = 1; T->coeffs[0] = n_invmod(B->coeffs[0], A->mod.n); nmod_poly_one(G); nmod_poly_zero(S); } else /* lenA >= lenB >= 2 */ { mp_ptr g, s, t; slong lenG; if (G == A || G == B) { g = _nmod_vec_init(FLINT_MIN(lenA, lenB)); } else { nmod_poly_fit_length(G, FLINT_MIN(lenA, lenB)); g = G->coeffs; } if (S == A || S == B) { s = _nmod_vec_init(FLINT_MAX(lenB - 1, 2)); } else { nmod_poly_fit_length(S, FLINT_MAX(lenB - 1, 2)); s = S->coeffs; } if (T == A || T == B) { t = _nmod_vec_init(FLINT_MAX(lenA - 1, 2)); } else { nmod_poly_fit_length(T, FLINT_MAX(lenA - 1, 2)); t = T->coeffs; } if (lenA >= lenB) lenG = _nmod_poly_xgcd_hgcd(g, s, t, A->coeffs, lenA, B->coeffs, lenB, A->mod); else lenG = _nmod_poly_xgcd_hgcd(g, t, s, B->coeffs, lenB, A->coeffs, lenA, A->mod); if (G == A || G == B) { flint_free(G->coeffs); G->coeffs = g; G->alloc = FLINT_MIN(lenA, lenB); } if (S == A || S == B) { flint_free(S->coeffs); S->coeffs = s; S->alloc = FLINT_MAX(lenB - 1, 2); } if (T == A || T == B) { flint_free(T->coeffs); T->coeffs = t; T->alloc = FLINT_MAX(lenA - 1, 2); } G->length = lenG; S->length = FLINT_MAX(lenB - lenG, 1); T->length = FLINT_MAX(lenA - lenG, 1); MPN_NORM(S->coeffs, S->length); MPN_NORM(T->coeffs, T->length); if (G->coeffs[lenG - 1] != 1) { inv = n_invmod(G->coeffs[lenG - 1], A->mod.n); nmod_poly_scalar_mul_nmod(G, G, inv); nmod_poly_scalar_mul_nmod(S, S, inv); nmod_poly_scalar_mul_nmod(T, T, inv); } } } }
void nmod_poly_xgcd(nmod_poly_t G, nmod_poly_t S, nmod_poly_t T, const nmod_poly_t A, const nmod_poly_t B) { const long lenA = A->length, lenB = B->length; mp_limb_t inv; if (lenA == 0) { if (lenB == 0) { nmod_poly_zero(G); nmod_poly_zero(S); nmod_poly_zero(T); } else { inv = n_invmod(B->coeffs[lenB - 1], B->mod.n); nmod_poly_scalar_mul_nmod(G, B, inv); nmod_poly_zero(S); nmod_poly_set_coeff_ui(T, 0, inv); T->length = 1; } } else if (lenB == 0) { inv = n_invmod(A->coeffs[lenA - 1], A->mod.n); nmod_poly_scalar_mul_nmod(G, A, inv); nmod_poly_zero(T); nmod_poly_set_coeff_ui(S, 0, inv); S->length = 1; } else { nmod_poly_t tG, tS, tT; mp_ptr g, s, t; long lenG; if (G == A || G == B) { nmod_poly_init2(tG, A->mod.n, FLINT_MIN(lenA, lenB)); g = tG->coeffs; } else { nmod_poly_fit_length(G, FLINT_MIN(lenA, lenB)); g = G->coeffs; } if (S == A || S == B) { nmod_poly_init2(tS, A->mod.n, lenB - 1); s = tS->coeffs; } else { nmod_poly_fit_length(S, lenB - 1); s = S->coeffs; } if (T == A || T == B) { nmod_poly_init2(tT, A->mod.n, lenA - 1); t = tT->coeffs; } else { nmod_poly_fit_length(T, lenA - 1); t = T->coeffs; } if (lenA >= lenB) lenG = _nmod_poly_xgcd(g, s, t, A->coeffs, lenA, B->coeffs, lenB, A->mod); else lenG = _nmod_poly_xgcd(g, t, s, B->coeffs, lenB, A->coeffs, lenA, A->mod); if (G == A || G == B) { nmod_poly_swap(tG, G); nmod_poly_clear(tG); } if (S == A || S == B) { nmod_poly_swap(tS, S); nmod_poly_clear(tS); } if (T == A || T == B) { nmod_poly_swap(tT, T); nmod_poly_clear(tT); } G->length = lenG; S->length = lenB - lenG; T->length = lenA - lenG; MPN_NORM(S->coeffs, S->length); MPN_NORM(T->coeffs, T->length); if (G->coeffs[lenG - 1] != 1) { inv = n_invmod(G->coeffs[lenG - 1], A->mod.n); nmod_poly_scalar_mul_nmod(G, G, inv); nmod_poly_scalar_mul_nmod(S, S, inv); nmod_poly_scalar_mul_nmod(T, T, inv); } } }
int main() { flint_rand_t state; slong nmax, n, bound, count; mp_limb_t p, pinv, m1, m2; nmod_poly_t A; flint_printf("rev...."); fflush(stdout); flint_randinit(state); bound = 100000; p = n_nextprime(UWORD(1) << (FLINT_BITS - 1), 0); pinv = n_preinvert_limb(p); nmod_poly_init(A, p); nmod_poly_set_coeff_ui(A, 1, 1); nmod_poly_exp_series(A, A, bound); nmod_poly_shift_right(A, A, 1); nmod_poly_inv_series(A, A, bound); m1 = 1; for (n = 0; n < A->length; n++) { A->coeffs[n] = n_mulmod2_preinv(A->coeffs[n], m1, p, pinv); m1 = n_mulmod2_preinv(m1, n + 1, p, pinv); } for (nmax = 0; nmax < bound; nmax = 1.5 * nmax + 2) { fmpz_t numer, denom; bernoulli_rev_t iter; fmpz_init(numer); fmpz_init(denom); nmax += (nmax % 2); bernoulli_rev_init(iter, nmax); if (nmax < 8000) count = 4000; else count = 100; /* flint_printf("nmax = %wd, count = %wd\n", nmax, count); */ for (n = nmax; n >= 0 && count > 0; n -= 2, count--) { bernoulli_rev_next(numer, denom, iter); m1 = fmpz_fdiv_ui(numer, p); m2 = fmpz_fdiv_ui(denom, p); m2 = n_invmod(m2, p); m1 = n_mulmod2_preinv(m1, m2, p, pinv); m2 = nmod_poly_get_coeff_ui(A, n); if (m1 != m2) { flint_printf("FAIL:\n"); flint_printf("nmax = %wd, n = %wd\n", nmax, n); flint_printf("m1 = %wu mod %wu\n", m1, p); flint_printf("m2 = %wu mod %wu\n", m2, p); abort(); } } bernoulli_rev_clear(iter); fmpz_clear(numer); fmpz_clear(denom); } flint_randclear(state); flint_cleanup(); flint_printf("PASS\n"); return EXIT_SUCCESS; }