void _fmpz_poly_sqrlow_KS(fmpz * res, const fmpz * poly, long len, long n) { int neg; long bits, limbs, loglen, sign = 0; mp_limb_t *arr_in, *arr_out; FMPZ_VEC_NORM(poly, len); if (len == 0) { _fmpz_vec_zero(res, n); return; } neg = (fmpz_sgn(poly + len - 1) > 0) ? 0 : -1; if (n > 2 * len - 1) { _fmpz_vec_zero(res + 2 * len - 1, n - (2 * len - 1)); n = 2 * len - 1; } bits = _fmpz_vec_max_bits(poly, len); if (bits < 0) { sign = 1; bits = - bits; } loglen = FLINT_BIT_COUNT(len); bits = 2 * bits + loglen + sign; limbs = (bits * len - 1) / FLINT_BITS + 1; arr_in = flint_calloc(limbs, sizeof(mp_limb_t)); arr_out = flint_malloc((2 * limbs) * sizeof(mp_limb_t)); _fmpz_poly_bit_pack(arr_in, poly, len, bits, neg); mpn_sqr(arr_out, arr_in, limbs); if (sign) _fmpz_poly_bit_unpack(res, n, arr_out, bits, 0); else _fmpz_poly_bit_unpack_unsigned(res, n, arr_out, bits); flint_free(arr_in); flint_free(arr_out); }
void fmpz_poly_bit_unpack(fmpz_poly_t poly, const fmpz_t f, mp_bitcnt_t bit_size) { slong len; mpz_t tmp; int negate, borrow; if (bit_size == 0 || fmpz_is_zero(f)) { fmpz_poly_zero(poly); return; } /* Round up */ len = (fmpz_bits(f) + bit_size - 1) / bit_size; negate = (fmpz_sgn(f) < 0) ? -1 : 0; mpz_init2(tmp, bit_size*len); /* TODO: avoid all this wastefulness */ flint_mpn_zero(tmp->_mp_d, tmp->_mp_alloc); fmpz_get_mpz(tmp, f); fmpz_poly_fit_length(poly, len + 1); borrow = _fmpz_poly_bit_unpack(poly->coeffs, len, tmp->_mp_d, bit_size, negate); if (borrow) { fmpz_set_si(poly->coeffs + len, negate ? WORD(-1) : WORD(1)); _fmpz_poly_set_length(poly, len + 1); } else { _fmpz_poly_set_length(poly, len); _fmpz_poly_normalise(poly); } mpz_clear(tmp); }
void _fmpz_poly_mullow_KS(fmpz * res, const fmpz * poly1, long len1, const fmpz * poly2, long len2, long n) { int neg1, neg2; long limbs1, limbs2, loglen; long bits1, bits2, bits; mp_limb_t *arr1, *arr2, *arr3; long sign = 0; FMPZ_VEC_NORM(poly1, len1); FMPZ_VEC_NORM(poly2, len2); if (!len1 | !len2) { _fmpz_vec_zero(res, n); return; } neg1 = (fmpz_sgn(poly1 + len1 - 1) > 0) ? 0 : -1; neg2 = (fmpz_sgn(poly2 + len2 - 1) > 0) ? 0 : -1; if (n > len1 + len2 - 1) { _fmpz_vec_zero(res + len1 + len2 - 1, n - (len1 + len2 - 1)); n = len1 + len2 - 1; } bits1 = _fmpz_vec_max_bits(poly1, len1); if (bits1 < 0) { sign = 1; bits1 = -bits1; } if (poly1 != poly2) { bits2 = _fmpz_vec_max_bits(poly2, len2); if (bits2 < 0) { sign = 1; bits2 = -bits2; } } else bits2 = bits1; loglen = FLINT_BIT_COUNT(FLINT_MIN(len1, len2)); bits = bits1 + bits2 + loglen + sign; limbs1 = (bits * len1 - 1) / FLINT_BITS + 1; limbs2 = (bits * len2 - 1) / FLINT_BITS + 1; if (poly1 == poly2) { arr1 = (mp_ptr) flint_calloc(limbs1, sizeof(mp_limb_t)); arr2 = arr1; _fmpz_poly_bit_pack(arr1, poly1, len1, bits, neg1); } else { arr1 = (mp_ptr) flint_calloc(limbs1 + limbs2, sizeof(mp_limb_t)); arr2 = arr1 + limbs1; _fmpz_poly_bit_pack(arr1, poly1, len1, bits, neg1); _fmpz_poly_bit_pack(arr2, poly2, len2, bits, neg2); } arr3 = (mp_ptr) flint_malloc((limbs1 + limbs2) * sizeof(mp_limb_t)); if (limbs1 == limbs2) mpn_mul_n(arr3, arr1, arr2, limbs1); else if (limbs1 > limbs2) mpn_mul(arr3, arr1, limbs1, arr2, limbs2); else mpn_mul(arr3, arr2, limbs2, arr1, limbs1); if (sign) _fmpz_poly_bit_unpack(res, n, arr3, bits, neg1 ^ neg2); else _fmpz_poly_bit_unpack_unsigned(res, n, arr3, bits); flint_free(arr1); flint_free(arr3); }
/* Assumes len1 != 0 != len2 */ int _fmpz_poly_gcd_heuristic(fmpz * res, const fmpz * poly1, long len1, const fmpz * poly2, long len2) { ulong bits1, bits2, max_bits, pack_bits, bound_bits, bits_G, bits_Q; ulong limbs1, limbs2, limbsg, pack_limbs, qlimbs; ulong log_glen, log_length; long sign1, sign2, glen, qlen; fmpz_t ac, bc, d, gc; fmpz * A, * B, * G, * Q, * t; mp_ptr array1, array2, arrayg, q, temp; int divides; fmpz_init(ac); fmpz_init(bc); fmpz_init(d); /* compute gcd of content of poly1 and poly2 */ _fmpz_poly_content(ac, poly1, len1); _fmpz_poly_content(bc, poly2, len2); fmpz_gcd(d, ac, bc); /* special case, one of the polys is a constant */ if (len2 == 1) /* if len1 == 1 then so does len2 */ { fmpz_set(res, d); fmpz_clear(ac); fmpz_clear(bc); fmpz_clear(d); return 1; } /* divide poly1 and poly2 by their content */ A = _fmpz_vec_init(len1); B = _fmpz_vec_init(len2); _fmpz_vec_scalar_divexact_fmpz(A, poly1, len1, ac); _fmpz_vec_scalar_divexact_fmpz(B, poly2, len2, bc); fmpz_clear(ac); fmpz_clear(bc); /* special case, one of the polys is length 2 */ if (len2 == 2) /* if len1 == 2 then so does len2 */ { Q = _fmpz_vec_init(len1 - len2 + 1); if (_fmpz_poly_divides(Q, A, len1, B, 2)) { _fmpz_vec_scalar_mul_fmpz(res, B, 2, d); if (fmpz_sgn(res + 1) < 0) _fmpz_vec_neg(res, res, 2); } else { fmpz_set(res, d); fmpz_zero(res + 1); } fmpz_clear(d); _fmpz_vec_clear(A, len1); _fmpz_vec_clear(B, len2); _fmpz_vec_clear(Q, len1 - len2 + 1); return 1; } /* Determine how many bits (pack_bits) to pack into. The bound bound_bits ensures that if G | A and G | B with G primitive then G is the gcd of A and B. The bound is taken from http://arxiv.org/abs/cs/0206032v1 */ bits1 = FLINT_ABS(_fmpz_vec_max_bits(A, len1)); bits2 = FLINT_ABS(_fmpz_vec_max_bits(B, len2)); max_bits = FLINT_MAX(bits1, bits2); bound_bits = FLINT_MIN(bits1, bits2) + 6; pack_bits = FLINT_MAX(bound_bits, max_bits); /* need to pack original polys */ pack_limbs = (pack_bits - 1)/FLINT_BITS + 1; if (pack_bits >= 32) /* pack into multiples of limbs if >= 32 bits */ pack_bits = FLINT_BITS*pack_limbs; /* allocate space to pack into */ limbs1 = (pack_bits*len1 - 1)/FLINT_BITS + 1; limbs2 = (pack_bits*len2 - 1)/FLINT_BITS + 1; array1 = flint_calloc(limbs1, sizeof(mp_limb_t)); array2 = flint_calloc(limbs2, sizeof(mp_limb_t)); arrayg = flint_calloc(limbs2, sizeof(mp_limb_t)); /* pack first poly and normalise */ sign1 = (long) fmpz_sgn(A + len1 - 1); _fmpz_poly_bit_pack(array1, A, len1, pack_bits, sign1); while (array1[limbs1 - 1] == 0) limbs1--; /* pack second poly and normalise */ sign2 = (long) fmpz_sgn(B + len2 - 1); _fmpz_poly_bit_pack(array2, B, len2, pack_bits, sign2); while (array2[limbs2 - 1] == 0) limbs2--; /* compute integer GCD */ limbsg = mpn_gcd_full(arrayg, array1, limbs1, array2, limbs2); /* Make space for unpacked gcd. May have one extra coeff due to 1 0 -x being packed as 0 -1 -x. */ glen = FLINT_MIN((limbsg*FLINT_BITS)/pack_bits + 1, len2); G = _fmpz_vec_init(glen); /* unpack gcd */ _fmpz_poly_bit_unpack(G, glen, arrayg, pack_bits, 0); while (G[glen - 1] == 0) glen--; /* divide by any content */ fmpz_init(gc); _fmpz_poly_content(gc, G, glen); if (!fmpz_is_one(gc)) limbsg = mpn_tdiv_q_fmpz_inplace(arrayg, limbsg, gc); /* make space for quotient and remainder of first poly by gcd */ qlimbs = limbs1 - limbsg + 1; qlen = FLINT_MIN(len1, (qlimbs*FLINT_BITS)/pack_bits + 1); qlimbs = (qlen*pack_bits - 1)/FLINT_BITS + 1; q = flint_calloc(qlimbs, sizeof(mp_limb_t)); temp = flint_malloc(limbsg*sizeof(mp_limb_t)); divides = 0; if (mpn_divides(q, array1, limbs1, arrayg, limbsg, temp)) { /* unpack quotient of first poly by gcd */ Q = _fmpz_vec_init(len1); t = _fmpz_vec_init(len1 + glen); _fmpz_poly_bit_unpack(Q, qlen, q, pack_bits, 0); while (Q[qlen - 1] == 0) qlen--; /* divide by content */ _fmpz_vec_scalar_divexact_fmpz(G, G, glen, gc); /* check if we really need to multiply out to check for exact quotient */ bits_G = FLINT_ABS(_fmpz_vec_max_bits(G, glen)); bits_Q = FLINT_ABS(_fmpz_vec_max_bits(Q, qlen)); log_glen = FLINT_BIT_COUNT(glen); log_length = FLINT_MIN(log_glen, FLINT_BIT_COUNT(qlen)); divides = (bits_G + bits_Q + log_length < pack_bits); if (!divides) /* need to multiply out to check exact quotient */ divides = multiplies_out(A, len1, Q, qlen, G, glen, sign1, t); if (divides) /* quotient really was exact */ { mpn_zero(q, qlimbs); if (mpn_divides(q, array2, limbs2, arrayg, limbsg, temp)) { /* unpack quotient of second poly by gcd */ qlimbs = limbs2 - limbsg + 1; qlen = FLINT_MIN(len2, (qlimbs*FLINT_BITS - 1)/pack_bits + 1); _fmpz_poly_bit_unpack(Q, qlen, q, pack_bits, 0); while (Q[qlen - 1] == 0) qlen--; /* check if we really need to multiply out to check for exact quotient */ bits_Q = FLINT_ABS(_fmpz_vec_max_bits(Q, qlen)); log_length = FLINT_MIN(log_glen, FLINT_BIT_COUNT(qlen)); divides = (bits_G + bits_Q + log_length < pack_bits); if (!divides) /* we need to multiply out */ divides = multiplies_out(B, len2, Q, qlen, G, glen, sign1, t); } } _fmpz_vec_clear(t, len1 + glen); _fmpz_vec_clear(Q, len1); } flint_free(q); flint_free(temp); flint_free(arrayg); flint_free(array1); flint_free(array2); fmpz_clear(gc); _fmpz_vec_clear(A, len1); _fmpz_vec_clear(B, len2); /* we found the gcd, so multiply by content */ if (divides) { _fmpz_vec_zero(res + glen, len2 - glen); _fmpz_vec_scalar_mul_fmpz(res, G, glen, d); } fmpz_clear(d); _fmpz_vec_clear(G, glen); return divides; }