int n_is_probabprime_fibonacci(mp_limb_t n) { mp_limb_t m; n_pair_t V; if (FLINT_ABS((mp_limb_signed_t) n) <= 3UL) { if (n >= 2UL) return 1; return 0; } m = (n - n_jacobi(5L, n)) / 2; /* cannot overflow as (5/n) = 0 for n = 2^64-1 */ if (FLINT_BIT_COUNT(n) <= FLINT_D_BITS) { double npre = n_precompute_inverse(n); V = fchain_precomp(m, n, npre); return (n_mulmod_precomp(n - 3UL, V.x, n, npre) == n_mulmod_precomp(2UL, V.y, n, npre)); } else { mp_limb_t ninv = n_preinvert_limb(n); V = fchain2_preinv(m, n, ninv); return (n_mulmod2_preinv(n - 3UL, V.x, n, ninv) == n_mulmod2_preinv(2UL, V.y, n, ninv)); } }
void _nmod_poly_divrem_basecase_1(mp_ptr Q, mp_ptr R, mp_ptr W, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); slong iR; mp_ptr ptrQ = Q - lenB + 1; mp_ptr R1 = W; flint_mpn_copyi(R1, A, lenA); for (iR = lenA - 1; iR >= lenB - 1; iR--) { if (R1[iR] == 0) { ptrQ[iR] = WORD(0); } else { ptrQ[iR] = n_mulmod2_preinv(R1[iR], invL, mod.n, mod.ninv); if (lenB > 1) { const mp_limb_t c = n_negmod(ptrQ[iR], mod.n); mpn_addmul_1(R1 + iR - lenB + 1, B, lenB - 1, c); } } } if (lenB > 1) _nmod_vec_reduce(R, R1, lenB - 1, mod); }
void _nmod_poly_div_basecase_2(mp_ptr Q, mp_ptr W, mp_srcptr A, long A_len, mp_srcptr B, long B_len, nmod_t mod) { long coeff, i, len; mp_limb_t lead_inv = n_invmod(B[B_len - 1], mod.n); mp_ptr B2, R2; mp_srcptr Btop; B2 = W; for (i = 0; i < B_len - 1; i++) { B2[2 * i] = B[i]; B2[2 * i + 1] = 0; } Btop = B2 + 2*(B_len - 1); R2 = W + 2*(B_len - 1); for (i = 0; i < A_len - B_len + 1; i++) { R2[2 * i] = A[B_len + i - 1]; R2[2 * i + 1] = 0; } coeff = A_len - B_len; while (coeff >= 0) { mp_limb_t r_coeff; r_coeff = n_ll_mod_preinv(R2[2 * coeff + 1], R2[2 * coeff], mod.n, mod.ninv); while (coeff >= 0 && r_coeff == 0L) { Q[coeff--] = 0L; if (coeff >= 0) r_coeff = n_ll_mod_preinv(R2[2 * coeff + 1], R2[2 * coeff], mod.n, mod.ninv); } if (coeff >= 0) { mp_limb_t c, * R_sub; Q[coeff] = n_mulmod2_preinv(r_coeff, lead_inv, mod.n, mod.ninv); c = n_negmod(Q[coeff], mod.n); len = FLINT_MIN(B_len - 1, coeff); R_sub = R2 + 2 * (coeff - len); if (len > 0) mpn_addmul_1(R_sub, Btop - 2*len, 2 * len, c); coeff--; } } }
mp_limb_t _nmod_poly_evaluate_nmod(mp_srcptr poly, slong len, mp_limb_t c, nmod_t mod) { slong m; mp_limb_t val; if (len == 0) return 0; if (len == 1 || c == 0) return poly[0]; m = len - 1; val = poly[m]; m--; for ( ; m >= 0; m--) { val = n_mulmod2_preinv(val, c, mod.n, mod.ninv); val = n_addmod(val, poly[m], mod.n); } return val; }
static __inline__ int n_is_strong_probabprime2_preinv_speedup(mp_limb_t n, mp_limb_t ninv, mp_limb_t a, mp_limb_t d) /* this subroutine does Miller-Rabin test and returns positive iff test passes hacked by Денис Крыськов to count n-1 once */ { mp_limb_t t = d; mp_limb_t y; y = n_powmod2_ui_preinv(a, t, n, ninv); if (y == UWORD(1)) return 1; t <<= 1; d = n-1; // Денис Крыськов was here while ((t != d) && (y != d)) // and here { y = n_mulmod2_preinv(y, y, n, ninv); t <<= 1; } return y == d; // and here }
void qsieve_ll_compute_B_terms(qs_t qs_inf) { long s = qs_inf->s; mp_limb_t * A_ind = qs_inf->A_ind; mp_limb_t * A_modp = qs_inf->A_modp; mp_limb_t * B_terms = qs_inf->B_terms; prime_t * factor_base = qs_inf->factor_base; mp_limb_t A = qs_inf->A; mp_limb_t B; mp_limb_t p, temp, temp2, pinv; long i; for (i = 0; i < s; i++) { p = factor_base[A_ind[i]].p; pinv = factor_base[A_ind[i]].pinv; temp = A/p; /* TODO: possibly use precomputed inverse here */ A_modp[i] = (temp2 = n_mod2_preinv(temp, p, pinv)); temp2 = n_invmod(temp2, p); temp2 = n_mulmod2_preinv(temp2, qs_inf->sqrts[A_ind[i]], p, pinv); if (temp2 > p/2) temp2 = p - temp2; B_terms[i] = temp*temp2; } B = B_terms[0]; for (i = 1; i < s; i++) { B += B_terms[i]; } qs_inf->B = B; }
void _nmod_poly_rem_basecase_1(mp_ptr R, mp_ptr W, mp_srcptr A, long lenA, mp_srcptr B, long lenB, nmod_t mod) { if (lenB > 1) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); long iR; mp_ptr R1 = W; mpn_copyi(R1, A, lenA); for (iR = lenA - 1; iR >= lenB - 1; iR--) { if (R1[iR] != 0) { const mp_limb_t q = n_mulmod2_preinv(R1[iR], invL, mod.n, mod.ninv); const mp_limb_t c = n_negmod(q, mod.n); mpn_addmul_1(R1 + iR - lenB + 1, B, lenB - 1, c); } } _nmod_vec_reduce(R, R1, lenB - 1, mod); } }
mp_limb_t _nmod_mat_det(nmod_mat_t A) { mp_limb_t det; long * P; long m = A->r; long rank; long i; P = flint_malloc(sizeof(long) * m); rank = nmod_mat_lu(P, A, 1); det = 0UL; if (rank == m) { det = 1UL; for (i = 0; i < m; i++) det = n_mulmod2_preinv(det, nmod_mat_entry(A, i, i), A->mod.n, A->mod.ninv); } if (_perm_parity(P, m) == 1) det = nmod_neg(det, A->mod); flint_free(P); return det; }
void nmod_mat_scalar_mul(nmod_mat_t B, const nmod_mat_t A, mp_limb_t c) { if (c == 0UL) { nmod_mat_zero(B); } else if (c == 1UL) { nmod_mat_set(B, A); } else if (c == A->mod.n - 1UL) { nmod_mat_neg(B, A); } else { long i, j; for (i = 0; i < A->r; i++) for (j = 0; j < A->c; j++) nmod_mat_entry(B, i, j) = n_mulmod2_preinv( nmod_mat_entry(A, i, j), c, A->mod.n, A->mod.ninv); } }
void qsieve_ll_compute_off_adj(qs_t qs_inf) { long num_primes = qs_inf->num_primes; mp_limb_t A = qs_inf->A; mp_limb_t B = qs_inf->B; mp_limb_t * A_inv = qs_inf->A_inv; mp_limb_t ** A_inv2B = qs_inf->A_inv2B; mp_limb_t * B_terms = qs_inf->B_terms; mp_limb_t * soln1 = qs_inf->soln1; mp_limb_t * soln2 = qs_inf->soln2; int * sqrts = qs_inf->sqrts; prime_t * factor_base = qs_inf->factor_base; long s = qs_inf->s; mp_limb_t p, temp, pinv; long i, j; for (i = 2; i < num_primes; i++) /* skip k and 2 */ { p = factor_base[i].p; pinv = factor_base[i].pinv; A_inv[i] = n_invmod(n_mod2_preinv(A, p, pinv), p); for (j = 0; j < s; j++) { temp = n_mod2_preinv(B_terms[j], p, pinv); temp = n_mulmod2_preinv(temp, A_inv[i], p, pinv); temp *= 2; if (temp >= p) temp -= p; A_inv2B[j][i] = temp; } temp = n_mod2_preinv(B, p, pinv); temp = sqrts[i] + p - temp; temp *= A_inv[i]; temp += qs_inf->sieve_size/2; soln1[i] = n_mod2_preinv(temp, p, pinv); temp = p - sqrts[i]; if (temp == p) temp -= p; temp = n_mulmod2_preinv(temp, A_inv[i], p, pinv); temp *= 2; if (temp >= p) temp -= p; soln2[i] = temp + soln1[i]; if (soln2[i] >= p) soln2[i] -= p; } }
void _nmod_poly_taylor_shift_convolution(mp_ptr p, mp_limb_t c, slong len, nmod_t mod) { slong i, n = len - 1; mp_limb_t f, d; mp_ptr t, u; if (c == 0 || len <= 1) return; t = _nmod_vec_init(len); u = _nmod_vec_init(len); f = 1; for (i = 2; i <= n; i++) { f = n_mulmod2_preinv(f, i, mod.n, mod.ninv); p[i] = n_mulmod2_preinv(p[i], f, mod.n, mod.ninv); } _nmod_poly_reverse(p, p, len, len); t[n] = 1; for (i = n; i > 0; i--) t[i - 1] = n_mulmod2_preinv(t[i], i, mod.n, mod.ninv); if (c == mod.n - 1) { for (i = 1; i <= n; i += 2) t[i] = nmod_neg(t[i], mod); } else if (c != 1) { d = c; for (i = 1; i <= n; i++) { t[i] = n_mulmod2_preinv(t[i], d, mod.n, mod.ninv); d = n_mulmod2_preinv(d, c, mod.n, mod.ninv); } } _nmod_poly_mullow(u, p, len, t, len, len, mod); f = n_mulmod2_preinv(f, f, mod.n, mod.ninv); f = n_invmod(f, mod.n); for (i = n; i >= 0; i--) { p[i] = n_mulmod2_preinv(u[n - i], f, mod.n, mod.ninv); f = n_mulmod2_preinv(f, (i == 0) ? 1 : i, mod.n, mod.ninv); } _nmod_vec_clear(t); _nmod_vec_clear(u); }
void _fq_nmod_trace(fmpz_t rop2, const mp_limb_t *op, slong len, const fq_nmod_ctx_t ctx) { const slong d = fq_nmod_ctx_degree(ctx); ulong i, l; mp_limb_t *t, rop; t = _nmod_vec_init(d); _nmod_vec_zero(t, d); t[0] = n_mod2_preinv(d, ctx->mod.n, ctx->mod.ninv); for (i = 1; i < d; i++) { for (l = ctx->len - 2; l >= 0 && ctx->j[l] >= d - (i - 1); l--) { t[i] = n_addmod(t[i], n_mulmod2_preinv(t[ctx->j[l] + i - d], ctx->a[l], ctx->mod.n, ctx->mod.ninv), ctx->mod.n); } if (l >= 0 && ctx->j[l] == d - i) { t[i] = n_addmod(t[i], n_mulmod2_preinv(ctx->a[l], i, ctx->mod.n, ctx->mod.ninv), ctx->mod.n); } t[i] = n_negmod(t[i], ctx->mod.n); } rop = WORD(0); for (i = 0; i < d; i++) { rop = n_addmod(rop, n_mulmod2_preinv(op[i], t[i], ctx->mod.n, ctx->mod.ninv), ctx->mod.n); } _nmod_vec_clear(t); fmpz_set_ui(rop2, rop); }
mp_limb_t n_powmod2_ui_preinv(mp_limb_t a, mp_limb_t exp, mp_limb_t n, mp_limb_t ninv) { mp_limb_t x, y; if (n == UWORD(1)) return UWORD(0); x = UWORD(1); y = a; while (exp) { if (exp & 1) x = n_mulmod2_preinv(x, y, n, ninv); exp >>= 1; if (exp) y = n_mulmod2_preinv(y, y, n, ninv); } return x; }
/* Assumes poly1 and poly2 are not length 0 and 0 < trunc <= len1 + len2 - 1 */ void _nmod_poly_mullow_classical(mp_ptr res, mp_srcptr poly1, slong len1, mp_srcptr poly2, slong len2, slong trunc, nmod_t mod) { if (len1 == 1 || trunc == 1) /* Special case if the length of output is 1 */ { res[0] = n_mulmod2_preinv(poly1[0], poly2[0], mod.n, mod.ninv); } else /* Ordinary case */ { slong i; slong bits = FLINT_BITS - (slong) mod.norm; slong log_len = FLINT_BIT_COUNT(len2); if (2 * bits + log_len <= FLINT_BITS) { /* Set res[i] = poly1[i]*poly2[0] */ mpn_mul_1(res, poly1, FLINT_MIN(len1, trunc), poly2[0]); if (len2 != 1) { /* Set res[i+len1-1] = in1[len1-1]*in2[i] */ if (trunc > len1) mpn_mul_1(res + len1, poly2 + 1, trunc - len1, poly1[len1 - 1]); /* out[i+j] += in1[i]*in2[j] */ for (i = 0; i < FLINT_MIN(len1, trunc) - 1; i++) mpn_addmul_1(res + i + 1, poly2 + 1, FLINT_MIN(len2, trunc - i) - 1, poly1[i]); } _nmod_vec_reduce(res, res, trunc, mod); } else { /* Set res[i] = poly1[i]*poly2[0] */ _nmod_vec_scalar_mul_nmod(res, poly1, FLINT_MIN(len1, trunc), poly2[0], mod); if (len2 == 1) return; /* Set res[i+len1-1] = in1[len1-1]*in2[i] */ if (trunc > len1) _nmod_vec_scalar_mul_nmod(res + len1, poly2 + 1, trunc - len1, poly1[len1 - 1], mod); /* out[i+j] += in1[i]*in2[j] */ for (i = 0; i < FLINT_MIN(len1, trunc) - 1; i++) _nmod_vec_scalar_addmul_nmod(res + i + 1, poly2 + 1, FLINT_MIN(len2, trunc - i) - 1, poly1[i], mod); } } }
void _nmod_poly_exp_series_monomial_ui(mp_ptr res, mp_limb_t coeff, ulong power, slong n, nmod_t mod) { slong k, r; mp_limb_t rfac; mp_limb_t a; r = (n - 1) / power; rfac = n_factorial_mod2_preinv(r, mod.n, mod.ninv); rfac = n_invmod(rfac, mod.n); if (power > 1) _nmod_vec_zero(res, n); res[0] = UWORD(1); if (coeff == UWORD(1)) { a = rfac; for (k = r; k >= 1; k--) { res[k * power] = a; a = n_mulmod2_preinv(a, k, mod.n, mod.ninv); } } else { a = coeff; for (k = power; k < n; k += power) { res[k] = a; a = n_mulmod2_preinv(a, coeff, mod.n, mod.ninv); } a = rfac; for (k = r; k >= 1; k--) { res[k * power] = n_mulmod2_preinv(res[k * power], a, mod.n, mod.ninv); a = n_mulmod2_preinv(a, k, mod.n, mod.ninv); } } }
void _nmod_poly_product_roots_nmod_vec(mp_ptr poly, mp_srcptr xs, slong n, nmod_t mod) { if (n == 0) { poly[0] = UWORD(1); } else if (n < 20) { slong i, j; poly[n] = UWORD(1); poly[n - 1] = nmod_neg(xs[0], mod); for (i = 1; i < n; i++) { poly[n-i-1] = nmod_neg(n_mulmod2_preinv(poly[n-i], xs[i], mod.n, mod.ninv), mod); for (j = 0; j < i - 1; j++) { poly[n-i+j] = nmod_sub(poly[n-i+j], n_mulmod2_preinv(poly[n-i+j+1], xs[i], mod.n, mod.ninv), mod); } poly[n-1] = nmod_sub(poly[n-1], xs[i], mod); } } else { const slong m = (n + 1) / 2; mp_ptr tmp; tmp = _nmod_vec_init(n + 2); _nmod_poly_product_roots_nmod_vec(tmp, xs, m, mod); _nmod_poly_product_roots_nmod_vec(tmp + m + 1, xs + m, n - m, mod); _nmod_poly_mul(poly, tmp, m + 1, tmp + m + 1, n - m + 1, mod); _nmod_vec_clear(tmp); } }
void _nmod_poly_divrem_basecase_3(mp_ptr Q, mp_ptr R, mp_ptr W, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); slong iR, i; mp_ptr B3 = W, R3 = W + 3*(lenB - 1), ptrQ = Q - lenB + 1; for (i = 0; i < lenB - 1; i++) { B3[3 * i] = B[i]; B3[3 * i + 1] = 0; B3[3 * i + 2] = 0; } for (i = 0; i < lenA; i++) { R3[3 * i] = A[i]; R3[3 * i + 1] = 0; R3[3 * i + 2] = 0; } for (iR = lenA - 1; iR >= lenB - 1; ) { mp_limb_t r = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1], R3[3 * iR], mod.n, mod.ninv); while ((iR + 1 >= lenB) && (r == WORD(0))) { ptrQ[iR--] = WORD(0); if (iR + 1 >= lenB) r = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1], R3[3 * iR], mod.n, mod.ninv); } if (iR + 1 >= lenB) { ptrQ[iR] = n_mulmod2_preinv(r, invL, mod.n, mod.ninv); if (lenB > 1) { const mp_limb_t c = n_negmod(ptrQ[iR], mod.n); mpn_addmul_1(R3 + 3 * (iR - lenB + 1), B3, 3 * lenB - 3, c); } iR--; } } for (iR = 0; iR < lenB - 1; iR++) R[iR] = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1], R3[3 * iR], mod.n, mod.ninv); }
static mp_limb_t n_factorial_mod2_foolproof(ulong n, mp_limb_t p, mp_limb_t pinv) { mp_limb_t prod = UWORD(1) % p; while (n) { prod = n_mulmod2_preinv(prod, n, p, pinv); n--; } return prod; }
mp_limb_t _fmpz_poly_evaluate_mod(const fmpz * poly, slong len, mp_limb_t a, mp_limb_t n, mp_limb_t ninv) { mp_limb_t c, res = 0; while (len--) { c = fmpz_fdiv_ui(poly + len, n); res = n_addmod(n_mulmod2_preinv(res, a, n, ninv), c, n); } return res; }
n_pair_t fchain2_preinv(mp_limb_t m, mp_limb_t n, mp_limb_t ninv) { n_pair_t current = {0, 0}, old; int length; mp_limb_t power, xy; old.x = 2UL; old.y = n - 3UL; length = FLINT_BIT_COUNT(m); power = (1UL << (length - 1)); for (; length > 0; length--) { xy = n_mulmod2_preinv(old.x, old.y, n, ninv); xy = n_addmod(xy, 3UL, n); if (m & power) { current.y = n_submod(n_mulmod2_preinv(old.y, old.y, n, ninv), 2UL, n); current.x = xy; } else { current.x = n_submod(n_mulmod2_preinv(old.x, old.x, n, ninv), 2UL, n); current.y = xy; } power >>= 1; old = current; } return current; }
long hmod_mat_lu_classical(long * P, hmod_mat_t A, int rank_check) { hlimb_t d, e, **a; nmod_t mod; long i, m, n, rank, length, row, col; m = A->r; n = A->c; a = A->rows; mod = A->mod; rank = row = col = 0; for (i = 0; i < m; i++) P[i] = i; while (row < m && col < n) { if (hmod_mat_pivot(A, P, row, col) == 0) { if (rank_check) return 0; col++; continue; } rank++; d = a[row][col]; d = n_invmod(d, mod.n); length = n - col - 1; for (i = row + 1; i < m; i++) { e = n_mulmod2_preinv(a[i][col], d, mod.n, mod.ninv); if (length != 0) _hmod_vec_scalar_addmul_hmod(a[i] + col + 1, a[row] + col + 1, length, nmod_neg(e, mod), mod); a[i][col] = 0; a[i][rank - 1] = e; } row++; col++; } return rank; }
void _nmod_poly_divrem_q1(mp_ptr Q, mp_ptr R, mp_srcptr A, long lenA, mp_srcptr B, long lenB, nmod_t mod) { const mp_limb_t invL = (B[lenB-1] == 1) ? 1 : n_invmod(B[lenB-1], mod.n); if (lenB == 1) { _nmod_vec_scalar_mul_nmod(Q, A, lenA, invL, mod); } else { mp_limb_t t; Q[1] = n_mulmod2_preinv(A[lenA-1], invL, mod.n, mod.ninv); t = n_mulmod2_preinv(Q[1], B[lenB-2], mod.n, mod.ninv); t = n_submod(A[lenA-2], t, mod.n); Q[0] = n_mulmod2_preinv(t, invL, mod.n, mod.ninv); if (FLINT_BITS + 2 <= 2 * mod.norm) { mpn_mul_1(R, B, lenB - 1, Q[0]); if (lenB > 2) mpn_addmul_1(R + 1, B, lenB - 2, Q[1]); _nmod_vec_reduce(R, R, lenB - 1, mod); } else { _nmod_vec_scalar_mul_nmod(R, B, lenB - 1, Q[0], mod); if (lenB > 2) _nmod_vec_scalar_addmul_nmod(R + 1, B, lenB - 2, Q[1], mod); } _nmod_vec_sub(R, A, R, lenB - 1, mod); } }
void nmod_mat_solve_tril_classical(nmod_mat_t X, const nmod_mat_t L, const nmod_mat_t B, int unit) { int nlimbs; long i, j, n, m; nmod_t mod; mp_ptr inv, tmp; n = L->r; m = B->c; mod = L->mod; if (!unit) { inv = _nmod_vec_init(n); for (i = 0; i < n; i++) inv[i] = n_invmod(nmod_mat_entry(L, i, i), mod.n); } else inv = NULL; nlimbs = _nmod_vec_dot_bound_limbs(n, mod); tmp = _nmod_vec_init(n); for (i = 0; i < m; i++) { for (j = 0; j < n; j++) tmp[j] = nmod_mat_entry(X, j, i); for (j = 0; j < n; j++) { mp_limb_t s; s = _nmod_vec_dot(L->rows[j], tmp, j, mod, nlimbs); s = nmod_sub(nmod_mat_entry(B, j, i), s, mod); if (!unit) s = n_mulmod2_preinv(s, inv[j], mod.n, mod.ninv); tmp[j] = s; } for (j = 0; j < n; j++) nmod_mat_entry(X, j, i) = tmp[j]; } _nmod_vec_clear(tmp); if (!unit) _nmod_vec_clear(inv); }
void _nmod_poly_div_basecase_1(mp_ptr Q, mp_ptr W, mp_srcptr A, long A_len, mp_srcptr B, long B_len, nmod_t mod) { mp_limb_t lead_inv = n_invmod(B[B_len - 1], mod.n); long len, coeff = A_len - B_len; mp_ptr R1 = W; mp_srcptr Btop = B + B_len - 1; mpn_copyi(R1, A + B_len - 1, A_len - B_len + 1); while (coeff >= 0) { R1[coeff] = n_mod2_preinv(R1[coeff], mod.n, mod.ninv); while (coeff >= 0 && R1[coeff] == 0L) { Q[coeff--] = 0L; if (coeff >= 0) R1[coeff] = n_mod2_preinv(R1[coeff], mod.n, mod.ninv); } if (coeff >= 0) { mp_limb_t c, * R_sub; Q[coeff] = n_mulmod2_preinv(R1[coeff], lead_inv, mod.n, mod.ninv); c = n_negmod(Q[coeff], mod.n); len = FLINT_MIN(B_len - 1, coeff); R_sub = R1 + coeff - len; if (len > 0) mpn_addmul_1(R_sub, Btop - len, len, c); coeff--; } } }
void _nmod_poly_integral(mp_ptr x_int, mp_srcptr x, slong len, nmod_t mod) { mp_limb_t r; slong k = len - 1; while (k > 0) { if (k > 3 && k < PROD_TAKE4) { r = n_invmod(k*(k-1)*(k-2)*(k-3), mod.n); x_int[k] = MUL3(x[k-1], r, (k-1)*(k-2)*(k-3)); x_int[k-1] = MUL3(x[k-2], r, k*(k-2)*(k-3)); x_int[k-2] = MUL3(x[k-3], r, k*(k-1)*(k-3)); x_int[k-3] = MUL3(x[k-4], r, k*(k-1)*(k-2)); k -= 4; } else if (k > 2 && k < PROD_TAKE3) { r = n_invmod(k*(k-1)*(k-2), mod.n); x_int[k] = MUL3(x[k-1], r, (k-1)*(k-2)); x_int[k-1] = MUL3(x[k-2], r, k*(k-2)); x_int[k-2] = MUL3(x[k-3], r, k*(k-1)); k -= 3; } else if (k > 1 && k < PROD_TAKE2) { r = n_invmod(k*(k-1), mod.n); x_int[k] = MUL3(x[k-1], r, k-1); x_int[k-1] = MUL3(x[k-2], r, k); k -= 2; } else { r = n_invmod(k, mod.n); x_int[k] = n_mulmod2_preinv(x[k-1], r, mod.n, mod.ninv); k -= 1; } } x_int[0] = UWORD(0); }
void _nmod_poly_rem_basecase_3(mp_ptr R, mp_ptr W, mp_srcptr A, long lenA, mp_srcptr B, long lenB, nmod_t mod) { if (lenB > 1) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); long iR, i; mp_ptr B3 = W, R3 = W + 3*(lenB - 1); for (i = 0; i < lenB - 1; i++) { B3[3 * i] = B[i]; B3[3 * i + 1] = 0; B3[3 * i + 2] = 0; } for (i = 0; i < lenA; i++) { R3[3 * i] = A[i]; R3[3 * i + 1] = 0; R3[3 * i + 2] = 0; } for (iR = lenA - 1; iR >= lenB - 1; iR--) { const mp_limb_t r = n_lll_mod_preinv(R3[3*iR + 2], R3[3*iR + 1], R3[3*iR], mod.n, mod.ninv); if (r != 0) { const mp_limb_t q = n_mulmod2_preinv(r, invL, mod.n, mod.ninv); const mp_limb_t c = n_negmod(q, mod.n); mpn_addmul_1(R3 + 3 * (iR - lenB + 1), B3, 3 * lenB - 3, c); } } for (iR = 0; iR < lenB - 1; iR++) R[iR] = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1], R3[3 * iR], mod.n, mod.ninv); } }
void fmpz_mat_det_modular_given_divisor_8arg(mpz_t det, nmod_mat_t Amod, mpfr_t hadamard_log2, mpfr_prec_t pr, p_k_pk_t* pp, n_primes_rev_t iT, mp_limb_t xmod, const fmpz_mat_t A) /* act like fmpz_mat_det_modular_given_divisor_4block(), but * decrease primes using iT, rather than increase them * don't count H.B., use hadamard_log2 which is 1+log2(H.B.) * sum logarithms instead of multiplying together found primes * re-use found prime pp->p and xmod which is determinant of A modulo pp->p hadamard_log2 on entry is upper bound on log2(2*H.B) on exit, decreased to an unspecified value iT on entry just found prime pp->p possibly gets shifted */ { // loop bound = 2*H.B / known det divisor decrease_bound_mpz(hadamard_log2,pr,det); #if 0 flint_printf("det modulo %llX = %llX\n",pp->p_deg_k,xmod); #endif // re-use known det A modulo pp->p_deg_k mp_limb_t divisor_inv=invert_det_divisor_modulo_pk(det,pp,&Amod->mod); xmod=n_mulmod2_preinv(xmod,divisor_inv, Amod->mod.n,Amod->mod.ninv); fmpz_t xnew,x; fmpz_init(xnew); fmpz_init(x); fmpz_t prod; fmpz_init_set_ui(prod, UWORD(1) ); fmpz_CRT_ui(xnew, x, prod, xmod, pp->p_deg_k, 1); fmpz_set_ui(prod, pp->p_deg_k); fmpz_set(x, xnew); #if LOUD_DET_BOUND mpfr_printf("fmpz_mat_det_modular_given_divisor_8arg(): log2 bound=%Rf\n", hadamard_log2); slong primes_used=1; #endif // for orthogonal matrice the bound might be reached at this point. // Attempt to skip main loop if(comp_bound_ui(hadamard_log2,pp->p_deg_k)) { mp_limb_t* scratch=flint_malloc( 4*(A->r-4)*sizeof(mp_limb_t) ); mp_limb_t bound=mpfr_get_uj(hadamard_log2,MPFR_RNDU); for(;;) { divisor_inv=choose_prime_and_degree( pp, &Amod->mod, iT, det ); // TODO: optimize fmpz_mat_get_nmod_mat() fmpz_mat_get_nmod_mat(Amod, A); // TODO: call a faster subroutine instead of nmod_mat_det_mod_pk_4block() // when pp->p is 64 bit long xmod=nmod_mat_det_mod_pk_4block(Amod,pp[0],scratch); xmod=n_mulmod2_preinv(xmod,divisor_inv, Amod->mod.n,Amod->mod.ninv); // TODO: rewrite fmpz_CRT_ui() -> mpz_CRT_ui_5arg() fmpz_CRT_ui(xnew, x, prod, xmod, pp->p_deg_k, 1); fmpz_mul_ui(prod, prod, pp->p_deg_k); #if LOUD_DET_BOUND primes_used++; #endif if(cmp_positive_log2(prod,bound) >= 0) break; fmpz_set(x, xnew); } flint_free(scratch); } #if LOUD_DET_BOUND flint_printf("fmpz_mat_det_modular_given_divisor_8arg() primes used: %d\n\n\n", primes_used); #endif fmpz_clear(prod); mpz_fmpz_mul_det_2arg(det,xnew); fmpz_clear(prod); fmpz_clear(x); fmpz_clear(xnew); }
int main() { flint_rand_t state; slong nmax, n, bound, count; mp_limb_t p, pinv, m1, m2; nmod_poly_t A; flint_printf("rev...."); fflush(stdout); flint_randinit(state); bound = 100000; p = n_nextprime(UWORD(1) << (FLINT_BITS - 1), 0); pinv = n_preinvert_limb(p); nmod_poly_init(A, p); nmod_poly_set_coeff_ui(A, 1, 1); nmod_poly_exp_series(A, A, bound); nmod_poly_shift_right(A, A, 1); nmod_poly_inv_series(A, A, bound); m1 = 1; for (n = 0; n < A->length; n++) { A->coeffs[n] = n_mulmod2_preinv(A->coeffs[n], m1, p, pinv); m1 = n_mulmod2_preinv(m1, n + 1, p, pinv); } for (nmax = 0; nmax < bound; nmax = 1.5 * nmax + 2) { fmpz_t numer, denom; bernoulli_rev_t iter; fmpz_init(numer); fmpz_init(denom); nmax += (nmax % 2); bernoulli_rev_init(iter, nmax); if (nmax < 8000) count = 4000; else count = 100; /* flint_printf("nmax = %wd, count = %wd\n", nmax, count); */ for (n = nmax; n >= 0 && count > 0; n -= 2, count--) { bernoulli_rev_next(numer, denom, iter); m1 = fmpz_fdiv_ui(numer, p); m2 = fmpz_fdiv_ui(denom, p); m2 = n_invmod(m2, p); m1 = n_mulmod2_preinv(m1, m2, p, pinv); m2 = nmod_poly_get_coeff_ui(A, n); if (m1 != m2) { flint_printf("FAIL:\n"); flint_printf("nmax = %wd, n = %wd\n", nmax, n); flint_printf("m1 = %wu mod %wu\n", m1, p); flint_printf("m2 = %wu mod %wu\n", m2, p); abort(); } } bernoulli_rev_clear(iter); fmpz_clear(numer); fmpz_clear(denom); } flint_randclear(state); flint_cleanup(); flint_printf("PASS\n"); return EXIT_SUCCESS; }
mp_limb_t n_sqrtmod(mp_limb_t a, mp_limb_t p) { slong i, r, m; mp_limb_t p1, k, b, g, bpow, gpow, res; mp_limb_t pinv; if (a <= 1) { return a; } pinv = n_preinvert_limb(p); if (n_jacobi_unsigned(a, p) == -1) return 0; if ((p & UWORD(3)) == 3) { return n_powmod2_ui_preinv(a, (p + 1) / 4, p, pinv); } r = 0; p1 = p - 1; do { p1 >>= UWORD(1); r++; } while ((p1 & UWORD(1)) == 0); b = n_powmod2_ui_preinv(a, p1, p, pinv); for (k = 2; ; k++) { if (n_jacobi_unsigned(k, p) == -1) break; } g = n_powmod2_ui_preinv(k, p1, p, pinv); res = n_powmod2_ui_preinv(a, (p1 + 1) / 2, p, pinv); while (b != 1) { bpow = b; m = 0; do { bpow = n_mulmod2_preinv(bpow, bpow, p, pinv); m++; } while (m < r && bpow != 1); gpow = g; for (i = 1; i < r - m; i++) { gpow = n_mulmod2_preinv(gpow, gpow, p, pinv); } res = n_mulmod2_preinv(res, gpow, p, pinv); g = n_mulmod2_preinv(gpow, gpow, p, pinv); b = n_mulmod2_preinv(b, g, p, pinv); r = m; } return res; }