static __inline__ void _arb_poly_addmullow_block(arb_ptr z, fmpz * zz, const fmpz * xz, const fmpz * xexps, const slong * xblocks, slong xlen, const fmpz * yz, const fmpz * yexps, const slong * yblocks, slong ylen, slong n, slong prec, int squaring) { slong i, j, k, xp, yp, xl, yl, bn; fmpz_t zexp; fmpz_init(zexp); if (squaring) { for (i = 0; (xp = xblocks[i]) != xlen; i++) { if (2 * xp >= n) continue; xl = xblocks[i + 1] - xp; bn = FLINT_MIN(2 * xl - 1, n - 2 * xp); xl = FLINT_MIN(xl, bn); _fmpz_poly_sqrlow(zz, xz + xp, xl, bn); _fmpz_add2_fast(zexp, xexps + i, xexps + i, 0); for (k = 0; k < bn; k++) arb_add_fmpz_2exp(z + 2 * xp + k, z + 2 * xp + k, zz + k, zexp, prec); } } for (i = 0; (xp = xblocks[i]) != xlen; i++) { for (j = squaring ? i + 1 : 0; (yp = yblocks[j]) != ylen; j++) { if (xp + yp >= n) continue; xl = xblocks[i + 1] - xp; yl = yblocks[j + 1] - yp; bn = FLINT_MIN(xl + yl - 1, n - xp - yp); xl = FLINT_MIN(xl, bn); yl = FLINT_MIN(yl, bn); if (xl >= yl) _fmpz_poly_mullow(zz, xz + xp, xl, yz + yp, yl, bn); else _fmpz_poly_mullow(zz, yz + yp, yl, xz + xp, xl, bn); _fmpz_add2_fast(zexp, xexps + i, yexps + j, squaring); for (k = 0; k < bn; k++) arb_add_fmpz_2exp(z + xp + yp + k, z + xp + yp + k, zz + k, zexp, prec); } } fmpz_clear(zexp); }
void _fmpz_poly_revert_series_lagrange(fmpz * Qinv, const fmpz * Q, slong n) { slong i; fmpz *R, *S, *T, *tmp; if (n <= 2) { _fmpz_vec_set(Qinv, Q, n); return; } R = _fmpz_vec_init(n - 1); S = _fmpz_vec_init(n - 1); T = _fmpz_vec_init(n - 1); fmpz_zero(Qinv); fmpz_set(Qinv + 1, Q + 1); _fmpz_poly_inv_series(R, Q + 1, n - 1); _fmpz_vec_set(S, R, n - 1); for (i = 2; i < n; i++) { _fmpz_poly_mullow(T, S, n - 1, R, n - 1, n - 1); fmpz_divexact_ui(Qinv + i, T + i - 1, i); tmp = S; S = T; T = tmp; } _fmpz_vec_clear(R, n - 1); _fmpz_vec_clear(S, n - 1); _fmpz_vec_clear(T, n - 1); }
void _fmpz_poly_revert_series_newton(fmpz * Qinv, const fmpz * Q, long n) { if (n <= 2) { _fmpz_vec_set(Qinv, Q, n); return; } else { long *a, i, k; fmpz *T, *U, *V; T = _fmpz_vec_init(n); U = _fmpz_vec_init(n); V = _fmpz_vec_init(n); k = n; for (i = 1; (1L << i) < k; i++); a = (long *) flint_malloc(i * sizeof(long)); a[i = 0] = k; while (k >= FLINT_REVERSE_NEWTON_CUTOFF) a[++i] = (k = (k + 1) / 2); _fmpz_poly_revert_series_lagrange(Qinv, Q, k); _fmpz_vec_zero(Qinv + k, n - k); for (i--; i >= 0; i--) { k = a[i]; _fmpz_poly_compose_series(T, Q, k, Qinv, k, k); _fmpz_poly_derivative(U, T, k); fmpz_zero(U + k - 1); fmpz_zero(T + 1); _fmpz_poly_div_series(V, T, U, k); _fmpz_poly_derivative(T, Qinv, k); _fmpz_poly_mullow(U, V, k, T, k, k); _fmpz_vec_sub(Qinv, Qinv, U, k); } flint_free(a); _fmpz_vec_clear(T, n); _fmpz_vec_clear(U, n); _fmpz_vec_clear(V, n); } }
static __inline__ void _arb_poly_addmullow_rad(arb_ptr z, fmpz * zz, const fmpz * xz, const double * xdbl, const fmpz * xexps, const slong * xblocks, slong xlen, const fmpz * yz, const double * ydbl, const fmpz * yexps, const slong * yblocks, slong ylen, slong n) { slong i, j, k, ii, xp, yp, xl, yl, bn; fmpz_t zexp; mag_t t; fmpz_init(zexp); mag_init(t); for (i = 0; (xp = xblocks[i]) != xlen; i++) { for (j = 0; (yp = yblocks[j]) != ylen; j++) { if (xp + yp >= n) continue; xl = xblocks[i + 1] - xp; yl = yblocks[j + 1] - yp; bn = FLINT_MIN(xl + yl - 1, n - xp - yp); xl = FLINT_MIN(xl, bn); yl = FLINT_MIN(yl, bn); fmpz_add_inline(zexp, xexps + i, yexps + j); if (xl > 1 && yl > 1 && (xl < DOUBLE_BLOCK_MAX_LENGTH || yl < DOUBLE_BLOCK_MAX_LENGTH)) { fmpz_add_ui(zexp, zexp, 2 * DOUBLE_BLOCK_SHIFT); for (k = 0; k < bn; k++) { /* Classical multiplication (may round down!) */ double ss = 0.0; for (ii = FLINT_MAX(0, k - yl + 1); ii <= FLINT_MIN(xl - 1, k); ii++) { ss += xdbl[xp + ii] * ydbl[yp + k - ii]; } /* Compensate for rounding error */ ss *= DOUBLE_ROUNDING_FACTOR; mag_set_d_2exp_fmpz(t, ss, zexp); mag_add(arb_radref(z + xp + yp + k), arb_radref(z + xp + yp + k), t); } } else { if (xl >= yl) _fmpz_poly_mullow(zz, xz + xp, xl, yz + yp, yl, bn); else _fmpz_poly_mullow(zz, yz + yp, yl, xz + xp, xl, bn); for (k = 0; k < bn; k++) { mag_set_fmpz_2exp_fmpz(t, zz + k, zexp); mag_add(arb_radref(z + xp + yp + k), arb_radref(z + xp + yp + k), t); } } } } fmpz_clear(zexp); mag_clear(t); }
void _fmprb_poly_mullow_ztrunc(fmprb_ptr C, fmprb_srcptr A, long lenA, fmprb_srcptr B, long lenB, long n, long prec) { fmpz * Acoeffs, * Bcoeffs, * Ccoeffs; fmpz_t Aexp, Bexp, Cexp; fmpr_t Aerr, Berr, Anorm, Bnorm, err; long i; int squaring; lenA = FLINT_MIN(lenA, n); lenB = FLINT_MIN(lenB, n); squaring = (A == B) && (lenA == lenB); /* TODO: make the code below work correctly with out this workaround */ if (_fmprb_vec_rad_has_inf_nan(A, lenA) || (!squaring && _fmprb_vec_rad_has_inf_nan(B, lenB))) { _fmprb_vec_indeterminate(C, n); return; } fmpz_init(Aexp); fmpz_init(Bexp); fmpz_init(Cexp); Acoeffs = _fmpz_vec_init(lenA); Bcoeffs = _fmpz_vec_init(lenB); Ccoeffs = _fmpz_vec_init(n); fmpr_init(Aerr); fmpr_init(Berr); fmpr_init(Anorm); fmpr_init(Bnorm); fmpr_init(err); _fmprb_poly_get_fmpz_poly_2exp(Aerr, Aexp, Acoeffs, A, lenA, prec); if (squaring) { _fmpz_poly_sqrlow(Ccoeffs, Acoeffs, lenA, n); fmpz_add(Cexp, Aexp, Aexp); /* cross-multiply error bounds: (A+r)(B+s) = A^2 + 2Ar + r^2 */ _fmpr_fmpz_vec_max_norm(Anorm, Acoeffs, lenA, FMPRB_RAD_PREC); fmpr_mul_2exp_fmpz(Anorm, Anorm, Aexp); fmpr_mul(err, Anorm, Aerr, FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_mul_2exp_si(err, err, 1); fmpr_addmul(err, Aerr, Aerr, FMPRB_RAD_PREC, FMPR_RND_UP); } else { _fmprb_poly_get_fmpz_poly_2exp(Berr, Bexp, Bcoeffs, B, lenB, prec); /* main multiplication */ if (lenA >= lenB) _fmpz_poly_mullow(Ccoeffs, Acoeffs, lenA, Bcoeffs, lenB, n); else _fmpz_poly_mullow(Ccoeffs, Bcoeffs, lenB, Acoeffs, lenA, n); fmpz_add(Cexp, Aexp, Bexp); /* cross-multiply error bounds: (A+r)(B+s) = AB + As + Br + rs */ _fmpr_fmpz_vec_max_norm(Anorm, Acoeffs, lenA, FMPRB_RAD_PREC); fmpr_mul_2exp_fmpz(Anorm, Anorm, Aexp); _fmpr_fmpz_vec_max_norm(Bnorm, Bcoeffs, lenB, FMPRB_RAD_PREC); fmpr_mul_2exp_fmpz(Bnorm, Bnorm, Bexp); fmpr_mul(err, Aerr, Berr, FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_addmul(err, Anorm, Berr, FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_addmul(err, Bnorm, Aerr, FMPRB_RAD_PREC, FMPR_RND_UP); } for (i = 0; i < n; i++) { fmprb_set_round_fmpz_2exp(C + i, Ccoeffs + i, Cexp, prec); /* there are at most (i+1) error terms for coefficient i */ /* TODO: make this tight */ fmpr_addmul_ui(fmprb_radref(C + i), err, i + 1, FMPRB_RAD_PREC, FMPR_RND_UP); } fmpr_clear(Aerr); fmpr_clear(Berr); fmpr_clear(Anorm); fmpr_clear(Bnorm); fmpr_clear(err); _fmpz_vec_clear(Acoeffs, lenA); _fmpz_vec_clear(Bcoeffs, lenB); _fmpz_vec_clear(Ccoeffs, n); fmpz_clear(Aexp); fmpz_clear(Bexp); fmpz_clear(Cexp); }
void _fmpq_poly_inv_series_newton(fmpz * Qinv, fmpz_t Qinvden, const fmpz * Q, const fmpz_t Qden, long n) { if (n == 1) { if (fmpz_sgn(Q) > 0) { fmpz_set(Qinv, Qden); fmpz_set(Qinvden, Q); } else { fmpz_neg(Qinv, Qden); fmpz_neg(Qinvden, Q); } } else { const long alloc = FLINT_MAX(n, 3 * FMPQ_POLY_INV_NEWTON_CUTOFF); long *a, i, m; fmpz *W, *Wden; W = _fmpz_vec_init(alloc + 1); Wden = W + alloc; for (i = 1; (1L << i) < n; i++) ; a = (long *) flint_malloc(i * sizeof(long)); a[i = 0] = n; while (n >= FMPQ_POLY_INV_NEWTON_CUTOFF) a[++i] = (n = (n + 1) / 2); /* Base case */ { fmpz *rev = W + 2 * FMPQ_POLY_INV_NEWTON_CUTOFF; _fmpz_poly_reverse(rev, Q, n, n); _fmpz_vec_zero(W, 2*n - 2); fmpz_one(W + (2*n - 2)); fmpz_one(Wden); _fmpq_poly_div(Qinv, Qinvden, W, Wden, 2*n - 1, rev, Qden, n); _fmpq_poly_canonicalise(Qinv, Qinvden, n); _fmpz_poly_reverse(Qinv, Qinv, n, n); } for (i--; i >= 0; i--) { m = n; n = a[i]; _fmpz_poly_mullow(W, Q, n, Qinv, m, n); fmpz_mul(Wden, Qden, Qinvden); _fmpz_poly_mullow(Qinv + m, Qinv, m, W + m, n - m, n - m); fmpz_mul(Qinvden, Qinvden, Wden); _fmpz_vec_scalar_mul_fmpz(Qinv, Qinv, m, Wden); _fmpz_vec_neg(Qinv + m, Qinv + m, n - m); _fmpq_poly_canonicalise(Qinv, Qinvden, n); } _fmpz_vec_clear(W, alloc + 1); flint_free(a); } }