void _nmod_poly_div_basecase_2(mp_ptr Q, mp_ptr W, mp_srcptr A, long A_len, mp_srcptr B, long B_len, nmod_t mod) { long coeff, i, len; mp_limb_t lead_inv = n_invmod(B[B_len - 1], mod.n); mp_ptr B2, R2; mp_srcptr Btop; B2 = W; for (i = 0; i < B_len - 1; i++) { B2[2 * i] = B[i]; B2[2 * i + 1] = 0; } Btop = B2 + 2*(B_len - 1); R2 = W + 2*(B_len - 1); for (i = 0; i < A_len - B_len + 1; i++) { R2[2 * i] = A[B_len + i - 1]; R2[2 * i + 1] = 0; } coeff = A_len - B_len; while (coeff >= 0) { mp_limb_t r_coeff; r_coeff = n_ll_mod_preinv(R2[2 * coeff + 1], R2[2 * coeff], mod.n, mod.ninv); while (coeff >= 0 && r_coeff == 0L) { Q[coeff--] = 0L; if (coeff >= 0) r_coeff = n_ll_mod_preinv(R2[2 * coeff + 1], R2[2 * coeff], mod.n, mod.ninv); } if (coeff >= 0) { mp_limb_t c, * R_sub; Q[coeff] = n_mulmod2_preinv(r_coeff, lead_inv, mod.n, mod.ninv); c = n_negmod(Q[coeff], mod.n); len = FLINT_MIN(B_len - 1, coeff); R_sub = R2 + 2 * (coeff - len); if (len > 0) mpn_addmul_1(R_sub, Btop - 2*len, 2 * len, c); coeff--; } } }
void _nmod_poly_divrem_basecase_2(mp_ptr Q, mp_ptr R, mp_ptr W, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); slong iR, i; mp_ptr B2 = W, R2 = W + 2*(lenB - 1), ptrQ = Q - lenB + 1; for (i = 0; i < lenB - 1; i++) { B2[2 * i] = B[i]; B2[2 * i + 1] = 0; } for (i = 0; i < lenA; i++) { R2[2 * i] = A[i]; R2[2 * i + 1] = 0; } for (iR = lenA - 1; iR >= lenB - 1; ) { mp_limb_t r = n_ll_mod_preinv(R2[2 * iR + 1], R2[2 * iR], mod.n, mod.ninv); while ((iR + 1 >= lenB) && (r == WORD(0))) { ptrQ[iR--] = WORD(0); if (iR + 1 >= lenB) r = n_ll_mod_preinv(R2[2 * iR + 1], R2[2 * iR], mod.n, mod.ninv); } if (iR + 1 >= lenB) { ptrQ[iR] = n_mulmod2_preinv(r, invL, mod.n, mod.ninv); if (lenB > 1) { const mp_limb_t c = n_negmod(ptrQ[iR], mod.n); mpn_addmul_1(R2 + 2 * (iR - lenB + 1), B2, 2 * lenB - 2, c); } iR--; } } for (iR = 0; iR < lenB - 1; iR++) R[iR] = n_ll_mod_preinv(R2[2*iR+1], R2[2*iR], mod.n, mod.ninv); }
int main(void) { int i, result; flint_rand_t state; printf("lll_mod_preinv...."); fflush(stdout); flint_randinit(state); for (i = 0; i < 1000000; i++) { mp_limb_t d, dinv, nh, nm, nl, r1, r2, m; d = n_randtest_not_zero(state); m = n_randtest(state); nh = n_randint(state, d); nm = n_randtest(state); nl = n_randtest(state); dinv = n_preinvert_limb(d); r2 = n_lll_mod_preinv(nh, nm, nl, d, dinv); nm = n_ll_mod_preinv(nh, nm, d, dinv); r1 = n_ll_mod_preinv(nm, nl, d, dinv); result = (r1 == r2); if (!result) { printf("FAIL:\n"); printf("nh = %lu, nm = %ld, nl = %lu, d = %lu, dinv = %lu\n", nh, nm, nl, d, dinv); printf("r1 = %lu, r2 = %lu\n", r1, r2); abort(); } } flint_randclear(state); printf("PASS\n"); return 0; }
void qsieve_ll_compute_A_factor_offsets(qs_t qs_inf) { long s = qs_inf->s; mp_limb_t * A_ind = qs_inf->A_ind; mp_limb_t * A_modp = qs_inf->A_modp; mp_limb_t * soln1 = qs_inf->soln1; mp_limb_t * soln2 = qs_inf->soln2; mp_limb_t p, D; mp_limb_t hi = qs_inf->hi; mp_limb_t lo = qs_inf->lo; mp_limb_t B = qs_inf->B; mp_limb_t temp, temp2, B_modp2, index, p2; prime_t * factor_base = qs_inf->factor_base; mp_limb_t * inv_p2 = qs_inf->inv_p2; mp_limb_t pinv; long j; for (j = 0; j < s; j++) { index = A_ind[j]; p = factor_base[index].p; p2 = p*p; pinv = factor_base[index].pinv; D = n_ll_mod_preinv(hi, lo, p*p, inv_p2[j]); if ((mp_limb_signed_t) B < 0) { B_modp2 = n_mod2_preinv(-B, p2, inv_p2[j]); B_modp2 = p2 - B_modp2; if (B_modp2 == p2) B_modp2 = 0; } else B_modp2 = n_mod2_preinv(B, p2, inv_p2[j]); temp = B_modp2*A_modp[j]; temp = n_mod2_preinv(temp, p, pinv); temp2 = n_invmod(temp, p); D -= (B_modp2*B_modp2); if ((mp_limb_signed_t) D < 0) temp = -(-D/p); /* TODO consider using precomputed inverse */ else temp = (D/p); /* TODO consider using precomputed inverse */ temp *= temp2; temp += qs_inf->sieve_size/2; if ((mp_limb_signed_t) temp < 0) { temp = p - n_mod2_preinv(-temp, p, pinv); if (temp == p) temp = 0; } else temp = n_mod2_preinv(temp, p, pinv); soln1[index] = temp; soln2[index] = -1; } }
void _nmod_poly_rem_basecase_2(mp_ptr R, mp_ptr W, mp_srcptr A, long lenA, mp_srcptr B, long lenB, nmod_t mod) { if (lenB > 1) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); long iR, i; mp_ptr B2 = W, R2 = W + 2*(lenB - 1); for (i = 0; i < lenB - 1; i++) { B2[2 * i] = B[i]; B2[2 * i + 1] = 0; } for (i = 0; i < lenA; i++) { R2[2 * i] = A[i]; R2[2 * i + 1] = 0; } for (iR = lenA - 1; iR >= lenB - 1; iR--) { const mp_limb_t r = n_ll_mod_preinv(R2[2 * iR + 1], R2[2 * iR], mod.n, mod.ninv); if (r != 0) { const mp_limb_t q = n_mulmod2_preinv(r, invL, mod.n, mod.ninv); const mp_limb_t c = n_negmod(q, mod.n); mpn_addmul_1(R2 + 2 * (iR - lenB + 1), B2, 2 * lenB - 2, c); } } for (iR = 0; iR < lenB - 1; iR++) R[iR] = n_ll_mod_preinv(R2[2*iR+1], R2[2*iR], mod.n, mod.ninv); } }
int main(void) { int i, result; FLINT_TEST_INIT(state); flint_printf("mulmod_precomp...."); fflush(stdout); for (i = 0; i < 100000 * flint_test_multiplier(); i++) { mp_limb_t a, b, d, r1, r2, p1, p2, dinv; double dpre; mp_limb_t bits = n_randint(state, FLINT_D_BITS) + 1; d = n_randtest_bits(state, bits); a = n_randtest(state) % d; b = n_randtest(state) % d; dpre = n_precompute_inverse(d); r1 = n_mulmod_precomp(a, b, d, dpre); umul_ppmm(p1, p2, a, b); dinv = n_preinvert_limb(d); r2 = n_ll_mod_preinv(p1, p2, d, dinv); result = (r1 == r2); if (!result) { flint_printf("FAIL:\n"); flint_printf("a = %wu, b = %wu, d = %wu, dinv = %f\n", a, b, d, dpre); flint_printf("r1 = %wu, r2 = %wu\n", r1, r2); abort(); } } FLINT_TEST_CLEANUP(state); flint_printf("PASS\n"); return 0; }