void fmpz_poly_sqrlow(fmpz_poly_t res, const fmpz_poly_t poly, long n) { const long len = poly->length; if (len == 0 || n == 0) { fmpz_poly_zero(res); return; } if (res == poly) { fmpz_poly_t t; fmpz_poly_init2(t, n); fmpz_poly_sqrlow(t, poly, n); fmpz_poly_swap(res, t); fmpz_poly_clear(t); return; } n = FLINT_MIN(2 * len - 1, n); fmpz_poly_fit_length(res, n); _fmpz_poly_sqrlow(res->coeffs, poly->coeffs, len, n); _fmpz_poly_set_length(res, n); _fmpz_poly_normalise(res); }
static __inline__ void _arb_poly_addmullow_block(arb_ptr z, fmpz * zz, const fmpz * xz, const fmpz * xexps, const slong * xblocks, slong xlen, const fmpz * yz, const fmpz * yexps, const slong * yblocks, slong ylen, slong n, slong prec, int squaring) { slong i, j, k, xp, yp, xl, yl, bn; fmpz_t zexp; fmpz_init(zexp); if (squaring) { for (i = 0; (xp = xblocks[i]) != xlen; i++) { if (2 * xp >= n) continue; xl = xblocks[i + 1] - xp; bn = FLINT_MIN(2 * xl - 1, n - 2 * xp); xl = FLINT_MIN(xl, bn); _fmpz_poly_sqrlow(zz, xz + xp, xl, bn); _fmpz_add2_fast(zexp, xexps + i, xexps + i, 0); for (k = 0; k < bn; k++) arb_add_fmpz_2exp(z + 2 * xp + k, z + 2 * xp + k, zz + k, zexp, prec); } } for (i = 0; (xp = xblocks[i]) != xlen; i++) { for (j = squaring ? i + 1 : 0; (yp = yblocks[j]) != ylen; j++) { if (xp + yp >= n) continue; xl = xblocks[i + 1] - xp; yl = yblocks[j + 1] - yp; bn = FLINT_MIN(xl + yl - 1, n - xp - yp); xl = FLINT_MIN(xl, bn); yl = FLINT_MIN(yl, bn); if (xl >= yl) _fmpz_poly_mullow(zz, xz + xp, xl, yz + yp, yl, bn); else _fmpz_poly_mullow(zz, yz + yp, yl, xz + xp, xl, bn); _fmpz_add2_fast(zexp, xexps + i, yexps + j, squaring); for (k = 0; k < bn; k++) arb_add_fmpz_2exp(z + xp + yp + k, z + xp + yp + k, zz + k, zexp, prec); } } fmpz_clear(zexp); }
void _fmprb_poly_mullow_ztrunc(fmprb_ptr C, fmprb_srcptr A, long lenA, fmprb_srcptr B, long lenB, long n, long prec) { fmpz * Acoeffs, * Bcoeffs, * Ccoeffs; fmpz_t Aexp, Bexp, Cexp; fmpr_t Aerr, Berr, Anorm, Bnorm, err; long i; int squaring; lenA = FLINT_MIN(lenA, n); lenB = FLINT_MIN(lenB, n); squaring = (A == B) && (lenA == lenB); /* TODO: make the code below work correctly with out this workaround */ if (_fmprb_vec_rad_has_inf_nan(A, lenA) || (!squaring && _fmprb_vec_rad_has_inf_nan(B, lenB))) { _fmprb_vec_indeterminate(C, n); return; } fmpz_init(Aexp); fmpz_init(Bexp); fmpz_init(Cexp); Acoeffs = _fmpz_vec_init(lenA); Bcoeffs = _fmpz_vec_init(lenB); Ccoeffs = _fmpz_vec_init(n); fmpr_init(Aerr); fmpr_init(Berr); fmpr_init(Anorm); fmpr_init(Bnorm); fmpr_init(err); _fmprb_poly_get_fmpz_poly_2exp(Aerr, Aexp, Acoeffs, A, lenA, prec); if (squaring) { _fmpz_poly_sqrlow(Ccoeffs, Acoeffs, lenA, n); fmpz_add(Cexp, Aexp, Aexp); /* cross-multiply error bounds: (A+r)(B+s) = A^2 + 2Ar + r^2 */ _fmpr_fmpz_vec_max_norm(Anorm, Acoeffs, lenA, FMPRB_RAD_PREC); fmpr_mul_2exp_fmpz(Anorm, Anorm, Aexp); fmpr_mul(err, Anorm, Aerr, FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_mul_2exp_si(err, err, 1); fmpr_addmul(err, Aerr, Aerr, FMPRB_RAD_PREC, FMPR_RND_UP); } else { _fmprb_poly_get_fmpz_poly_2exp(Berr, Bexp, Bcoeffs, B, lenB, prec); /* main multiplication */ if (lenA >= lenB) _fmpz_poly_mullow(Ccoeffs, Acoeffs, lenA, Bcoeffs, lenB, n); else _fmpz_poly_mullow(Ccoeffs, Bcoeffs, lenB, Acoeffs, lenA, n); fmpz_add(Cexp, Aexp, Bexp); /* cross-multiply error bounds: (A+r)(B+s) = AB + As + Br + rs */ _fmpr_fmpz_vec_max_norm(Anorm, Acoeffs, lenA, FMPRB_RAD_PREC); fmpr_mul_2exp_fmpz(Anorm, Anorm, Aexp); _fmpr_fmpz_vec_max_norm(Bnorm, Bcoeffs, lenB, FMPRB_RAD_PREC); fmpr_mul_2exp_fmpz(Bnorm, Bnorm, Bexp); fmpr_mul(err, Aerr, Berr, FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_addmul(err, Anorm, Berr, FMPRB_RAD_PREC, FMPR_RND_UP); fmpr_addmul(err, Bnorm, Aerr, FMPRB_RAD_PREC, FMPR_RND_UP); } for (i = 0; i < n; i++) { fmprb_set_round_fmpz_2exp(C + i, Ccoeffs + i, Cexp, prec); /* there are at most (i+1) error terms for coefficient i */ /* TODO: make this tight */ fmpr_addmul_ui(fmprb_radref(C + i), err, i + 1, FMPRB_RAD_PREC, FMPR_RND_UP); } fmpr_clear(Aerr); fmpr_clear(Berr); fmpr_clear(Anorm); fmpr_clear(Bnorm); fmpr_clear(err); _fmpz_vec_clear(Acoeffs, lenA); _fmpz_vec_clear(Bcoeffs, lenB); _fmpz_vec_clear(Ccoeffs, n); fmpz_clear(Aexp); fmpz_clear(Bexp); fmpz_clear(Cexp); }