/* Recursive Karatsuba assuming polynomials are in revbin format. Assumes rev1 and rev2 are both of length 2^bits and that temp has space for 2^bits coefficients. */ void _fmpz_poly_mul_kara_recursive(fmpz * out, fmpz * rev1, fmpz * rev2, fmpz * temp, long bits) { long length = (1L << bits); long m = length / 2; if (length == 1) { fmpz_mul(out, rev1, rev2); fmpz_zero(out + 1); return; } _fmpz_vec_add(temp, rev1, rev1 + m, m); _fmpz_vec_add(temp + m, rev2, rev2 + m, m); _fmpz_poly_mul_kara_recursive(out, rev1, rev2, temp + 2 * m, bits - 1); _fmpz_poly_mul_kara_recursive(out + length, temp, temp + m, temp + 2 * m, bits - 1); _fmpz_poly_mul_kara_recursive(temp, rev1 + m, rev2 + m, temp + 2 * m, bits - 1); _fmpz_vec_sub(out + length, out + length, out, length); _fmpz_vec_sub(out + length, out + length, temp, length); _fmpz_vec_add_rev(out, temp, bits); }
void _fmpz_poly_hensel_lift_without_inverse(fmpz *G, fmpz *H, const fmpz *f, long lenF, const fmpz *g, long lenG, const fmpz *h, long lenH, const fmpz *a, long lenA, const fmpz *b, long lenB, const fmpz_t p, const fmpz_t p1) { const fmpz one[1] = {1l}; const long lenM = FLINT_MAX(lenG, lenH); const long lenE = FLINT_MAX(lenG + lenB - 2, lenH + lenA - 2); const long lenD = FLINT_MAX(lenE, lenF); fmpz *C, *D, *E, *M; C = _fmpz_vec_init(lenF + lenD + lenE + lenM); D = C + lenF; E = D + lenD; M = E + lenE; if (lenG >= lenH) _fmpz_poly_mul(C, g,lenG, h, lenH); else _fmpz_poly_mul(C, h, lenH, g, lenG); _fmpz_vec_sub(C, f, C, lenF); _fmpz_vec_scalar_divexact_fmpz(D, C, lenF, p); _fmpz_vec_scalar_mod_fmpz(C, D, lenF, p1); lift(G, g, lenG, b, lenB); lift(H, h, lenH, a, lenA); _fmpz_vec_clear(C, lenF + lenD + lenE + lenM); }
int main(void) { int i, result; FLINT_TEST_INIT(state); flint_printf("scalar_submul_si...."); fflush(stdout); /* Compare with alternative method of computation */ for (i = 0; i < 1000 * flint_test_multiplier(); i++) { fmpz *a, *b, *c, *d; slong len = n_randint(state, 100), x; a = _fmpz_vec_init(len); b = _fmpz_vec_init(len); c = _fmpz_vec_init(len); d = _fmpz_vec_init(len); _fmpz_vec_randtest(a, state, len, 200); _fmpz_vec_randtest(b, state, len, 200); _fmpz_vec_set(c, b, len); x = z_randtest(state); _fmpz_vec_scalar_submul_si(b, a, len, x); _fmpz_vec_scalar_mul_si(d, a, len, x); _fmpz_vec_sub(c, c, d, len); result = (_fmpz_vec_equal(b, c, len)); if (!result) { flint_printf("FAIL:\n"); flint_printf("x = %wd\n", x); _fmpz_vec_print(b, len), flint_printf("\n\n"); _fmpz_vec_print(c, len), flint_printf("\n\n"); abort(); } _fmpz_vec_clear(a, len); _fmpz_vec_clear(b, len); _fmpz_vec_clear(c, len); _fmpz_vec_clear(d, len); } FLINT_TEST_CLEANUP(state); flint_printf("PASS\n"); return 0; }
void _fmpz_poly_revert_series_newton(fmpz * Qinv, const fmpz * Q, long n) { if (n <= 2) { _fmpz_vec_set(Qinv, Q, n); return; } else { long *a, i, k; fmpz *T, *U, *V; T = _fmpz_vec_init(n); U = _fmpz_vec_init(n); V = _fmpz_vec_init(n); k = n; for (i = 1; (1L << i) < k; i++); a = (long *) flint_malloc(i * sizeof(long)); a[i = 0] = k; while (k >= FLINT_REVERSE_NEWTON_CUTOFF) a[++i] = (k = (k + 1) / 2); _fmpz_poly_revert_series_lagrange(Qinv, Q, k); _fmpz_vec_zero(Qinv + k, n - k); for (i--; i >= 0; i--) { k = a[i]; _fmpz_poly_compose_series(T, Q, k, Qinv, k, k); _fmpz_poly_derivative(U, T, k); fmpz_zero(U + k - 1); fmpz_zero(T + 1); _fmpz_poly_div_series(V, T, U, k); _fmpz_poly_derivative(T, Qinv, k); _fmpz_poly_mullow(U, V, k, T, k, k); _fmpz_vec_sub(Qinv, Qinv, U, k); } flint_free(a); _fmpz_vec_clear(T, n); _fmpz_vec_clear(U, n); _fmpz_vec_clear(V, n); } }
void _fmpz_vec_scalar_submul_fmpz(fmpz * vec1, const fmpz * vec2, slong len2, const fmpz_t x) { fmpz c = *x; if (!COEFF_IS_MPZ(c)) { if (c == 0) return; else if (c == 1) _fmpz_vec_sub(vec1, vec1, vec2, len2); else if (c == -1) _fmpz_vec_add(vec1, vec1, vec2, len2); else _fmpz_vec_scalar_submul_si(vec1, vec2, len2, c); } else { slong i; for (i = 0; i < len2; i++) fmpz_submul(vec1 + i, vec2 + i, x); } }
int main(void) { int i, result; flint_rand_t state; printf("scalar_submul_fmpz...."); fflush(stdout); flint_randinit(state); /* Compare with fmpz_vec_scalar_submul_si */ for (i = 0; i < 10000; i++) { fmpz *a, *b, *c; long len, n; fmpz_t n1; len = n_randint(state, 100); n = (long) n_randbits(state, FLINT_BITS - 1); if (n_randint(state, 2)) n = -n; fmpz_init(n1); fmpz_set_si(n1, n); a = _fmpz_vec_init(len); b = _fmpz_vec_init(len); c = _fmpz_vec_init(len); _fmpz_vec_randtest(a, state, len, 200); _fmpz_vec_randtest(b, state, len, 200); _fmpz_vec_set(c, b, len); _fmpz_vec_scalar_submul_fmpz(b, a, len, n1); _fmpz_vec_scalar_submul_si(c, a, len, n); result = (_fmpz_vec_equal(c, b, len)); if (!result) { printf("FAIL:\n"); _fmpz_vec_print(c, len), printf("\n\n"); _fmpz_vec_print(b, len), printf("\n\n"); abort(); } fmpz_clear(n1); _fmpz_vec_clear(a, len); _fmpz_vec_clear(b, len); _fmpz_vec_clear(c, len); } /* Compute a different way */ for (i = 0; i < 10000; i++) { fmpz *a, *b, *c, *d; long len = n_randint(state, 100); fmpz_t n1; fmpz_init(n1); fmpz_randtest(n1, state, 200); a = _fmpz_vec_init(len); b = _fmpz_vec_init(len); c = _fmpz_vec_init(len); d = _fmpz_vec_init(len); _fmpz_vec_randtest(a, state, len, 200); _fmpz_vec_randtest(b, state, len, 200); _fmpz_vec_set(c, b, len); _fmpz_vec_scalar_submul_fmpz(b, a, len, n1); _fmpz_vec_scalar_mul_fmpz(d, a, len, n1); _fmpz_vec_sub(c, c, d, len); result = (_fmpz_vec_equal(c, b, len)); if (!result) { printf("FAIL:\n"); _fmpz_vec_print(c, len), printf("\n\n"); _fmpz_vec_print(b, len), printf("\n\n"); abort(); } fmpz_clear(n1); _fmpz_vec_clear(a, len); _fmpz_vec_clear(b, len); _fmpz_vec_clear(c, len); _fmpz_vec_clear(d, len); } flint_randclear(state); _fmpz_cleanup(); printf("PASS\n"); return 0; }
static void __fmpz_poly_divrem_divconquer(fmpz * Q, fmpz * R, const fmpz * A, long lenA, const fmpz * B, long lenB) { if (lenA < 2 * lenB - 1) { /* Convert unbalanced division into a 2 n1 - 1 by n1 division */ const long n1 = lenA - lenB + 1; const long n2 = lenB - n1; const fmpz * p1 = A + n2; const fmpz * d1 = B + n2; const fmpz * d2 = B; fmpz * W = _fmpz_vec_init((2 * n1 - 1) + lenB - 1); fmpz * d1q1 = R + n2; fmpz * d2q1 = W + (2 * n1 - 1); _fmpz_poly_divrem_divconquer_recursive(Q, d1q1, W, p1, d1, n1); /* Compute d2q1 = Q d2, of length lenB - 1 */ if (n1 >= n2) _fmpz_poly_mul(d2q1, Q, n1, d2, n2); else _fmpz_poly_mul(d2q1, d2, n2, Q, n1); /* Compute BQ = d1q1 * x^n1 + d2q1, of length lenB - 1; then compute R = A - BQ */ _fmpz_vec_swap(R, d2q1, n2); _fmpz_vec_add(R + n2, R + n2, d2q1 + n2, n1 - 1); _fmpz_vec_sub(R, A, R, lenA); _fmpz_vec_clear(W, (2 * n1 - 1) + lenB - 1); } else if (lenA > 2 * lenB - 1) { /* We shift A right until it is of length 2 lenB - 1, call this p1 */ const long shift = lenA - 2 * lenB + 1; const fmpz * p1 = A + shift; fmpz * q1 = Q + shift; fmpz * q2 = Q; fmpz * W = R + lenA; fmpz * d1q1 = W + (2 * lenB - 1); /* XXX: In this case, we expect R to be of length lenA + 2 * (2 * lenB - 1) and A to be modifiable */ /* Set q1 to p1 div B, a 2 lenB - 1 by lenB division, so q1 ends up being of length lenB; set d1q1 = d1 * q1 of length 2 lenB - 1 */ _fmpz_poly_divrem_divconquer_recursive(q1, d1q1, W, p1, B, lenB); /* We have dq1 = d1 * q1 * x^shift, of length lenA Compute R = A - dq1; the first lenB coeffs represent remainder terms (zero if division is exact), leaving lenA - lenB significant terms which we use in the division */ _fmpz_vec_sub((fmpz *) A + shift, A + shift, d1q1, lenB - 1); /* Compute q2 = trunc(R) div B; it is a smaller division than the original since len(trunc(R)) = lenA - lenB */ __fmpz_poly_divrem_divconquer(q2, R, A, lenA - lenB, B, lenB); _fmpz_vec_sub(R + lenA - lenB, A + lenA - lenB, d1q1 + lenB - 1, lenB); /* We have Q = q1 * x^shift + q2; Q has length lenB + shift; note q2 has length shift since the above division is lenA - lenB by lenB We've also written the remainder in place */ } else /* lenA = 2 * lenB - 1 */ { fmpz * W = _fmpz_vec_init(lenA); _fmpz_poly_divrem_divconquer_recursive(Q, R, W, A, B, lenB); _fmpz_vec_sub(R, A, R, lenA); _fmpz_vec_clear(W, lenA); } }