void mpz_sqrt (mpz_ptr root, mpz_srcptr op) { mp_size_t op_size, root_size; mp_ptr root_ptr, op_ptr; mp_ptr free_me = NULL; mp_size_t free_me_size; TMP_DECL (marker); TMP_MARK (marker); op_size = op->_mp_size; if (op_size <= 0) { if (op_size < 0) SQRT_OF_NEGATIVE; SIZ(root) = 0; return; } /* The size of the root is accurate after this simple calculation. */ root_size = (op_size + 1) / 2; root_ptr = root->_mp_d; op_ptr = op->_mp_d; if (root->_mp_alloc < root_size) { if (root_ptr == op_ptr) { free_me = root_ptr; free_me_size = root->_mp_alloc; } else (*__gmp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB); root->_mp_alloc = root_size; root_ptr = (mp_ptr) (*__gmp_allocate_func) (root_size * BYTES_PER_MP_LIMB); root->_mp_d = root_ptr; } else { /* Make OP not overlap with ROOT. */ if (root_ptr == op_ptr) { /* ROOT and OP are identical. Allocate temporary space for OP. */ op_ptr = (mp_ptr) TMP_ALLOC (op_size * BYTES_PER_MP_LIMB); /* Copy to the temporary space. Hack: Avoid temporary variable by using ROOT_PTR. */ MPN_COPY (op_ptr, root_ptr, op_size); } } mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size); root->_mp_size = root_size; if (free_me != NULL) (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB); TMP_FREE (marker); }
void gmp_wrap_isqrt(mp_limb_t *res, mp_size_t l2, mp_limb_t *n1, mp_size_t l1) { if (n1[l1 - 1] == 0) { l1--; res[l2 - 1] = 0; } mpn_sqrtrem(res, 0, n1, l1); }
void gmp_sb_isqrt(char * n1l, char * resl) { WORD_PTR_TYPE n1 = TO_WORD_PTR(n1l); mp_size_t l1 = BIGNUM_LENGTH(n1); WORD_PTR_TYPE res = TO_WORD_PTR(resl); unsigned long l2 = BIGNUM_LENGTH(res); if (n1[l1 - 1] == 0) { l1--; res[l2 - 1] = 0; } mpn_sqrtrem(res, 0, n1, l1); }
void mpf_sqrt (mpf_ptr r, mpf_srcptr u) { mp_size_t usize; mp_ptr up, tp; mp_size_t prec, tsize; mp_exp_t uexp, expodd; TMP_DECL; usize = u->_mp_size; if (usize <= 0) { if (usize < 0) SQRT_OF_NEGATIVE; r->_mp_size = 0; r->_mp_exp = 0; return; } TMP_MARK; uexp = u->_mp_exp; prec = r->_mp_prec; up = u->_mp_d; expodd = (uexp & 1); tsize = 2 * prec - expodd; r->_mp_size = prec; r->_mp_exp = (uexp + expodd) / 2; /* ceil(uexp/2) */ /* root size is ceil(tsize/2), this will be our desired "prec" limbs */ ASSERT ((tsize + 1) / 2 == prec); tp = (mp_ptr) TMP_ALLOC (tsize * BYTES_PER_MP_LIMB); if (usize > tsize) { up += usize - tsize; usize = tsize; MPN_COPY (tp, up, tsize); } else { MPN_ZERO (tp, tsize - usize); MPN_COPY (tp + (tsize - usize), up, usize); } mpn_sqrtrem (r->_mp_d, NULL, tp, tsize); TMP_FREE; }
void fmpz_sqrtrem(fmpz_t sqrt, fmpz_t rem, fmpz_t n) { long size = n[0]; if (size < 0L) { printf("Cannot take the square root of a negative number!\n"); abort(); } if (!size) { fmpz_set_ui(sqrt, 0L); fmpz_set_ui(rem, 0L); return; } rem[0] = mpn_sqrtrem(sqrt+1, rem+1, n+1, size); sqrt[0] = (size+1)/2; }
void mpf_sqrt_ui (mpf_ptr r, unsigned long int u) { mp_size_t rsize, zeros; mp_ptr tp; mp_size_t prec; TMP_DECL; if (UNLIKELY (u == 0)) { r->_mp_size = 0; r->_mp_exp = 0; return; } TMP_MARK; prec = r->_mp_prec; zeros = 2 * prec - 2; rsize = zeros + 1 + U2; tp = TMP_ALLOC_LIMBS (rsize); MPN_ZERO (tp, zeros); tp[zeros] = u & GMP_NUMB_MASK; #if U2 { mp_limb_t uhigh = u >> GMP_NUMB_BITS; tp[zeros + 1] = uhigh; rsize -= (uhigh == 0); } #endif mpn_sqrtrem (r->_mp_d, NULL, tp, rsize); r->_mp_size = prec; r->_mp_exp = 1; TMP_FREE; }
mp_limb_t _ll_factor_SQUFOF(mp_limb_t n_hi, mp_limb_t n_lo, ulong max_iters) { mp_limb_t n[2]; mp_limb_t sqrt[2]; mp_limb_t rem[2]; mp_size_t num, sqroot, p, q; mp_limb_t l, l2, iq, pnext; mp_limb_t qarr[50]; mp_limb_t qupto, qlast, t, r = 0; ulong i, j; n[0] = n_lo; n[1] = n_hi; if (n_hi) num = mpn_sqrtrem(sqrt, rem, n, 2); else num = ((sqrt[0] = n_sqrtrem(rem, n_lo)) != 0UL); sqroot = sqrt[0]; p = sqroot; q = rem[0]; if ((q == 0) || (num == 0)) { return sqroot; } l = 1 + 2*n_sqrt(2*p); l2 = l/2; qupto = 0; qlast = 1; for (i = 0; i < max_iters; i++) { iq = (sqroot + p)/q; pnext = iq*q - p; if (q <= l) { if ((q & 1UL) == 0UL) { qarr[qupto] = q/2; qupto++; if (qupto >= 50UL) return 0UL; } else if (q <= l2) { qarr[qupto] = q; qupto++; if (qupto >= 50UL) return 0UL; } } t = qlast + iq*(p - pnext); qlast = q; q = t; p = pnext; if ((i & 1) == 1) continue; if (!n_is_square(q)) continue; r = n_sqrt(q); if (qupto == 0UL) break; for (j = 0; j < qupto; j++) if (r == qarr[j]) goto cont; break; cont: ; if (r == 1UL) return 0UL; } if (i == max_iters) return 0UL; /* taken too long, give up */ qlast = r; p = p + r*((sqroot - p)/r); umul_ppmm(rem[1], rem[0], p, p); sub_ddmmss(sqrt[1], sqrt[0], n[1], n[0], rem[1], rem[0]); if (sqrt[1]) { int norm; count_leading_zeros(norm, qlast); udiv_qrnnd(q, rem[0], (sqrt[1] << norm) + r_shift(sqrt[0], FLINT_BITS - norm), sqrt[0] << norm, qlast << norm); rem[0] >>= norm; } else {
int mpfr_sqrt (mpfr_ptr r, mpfr_srcptr u, mpfr_rnd_t rnd_mode) { mp_size_t rsize; /* number of limbs of r (plus 1 if exact limb multiple) */ mp_size_t rrsize; mp_size_t usize; /* number of limbs of u */ mp_size_t tsize; /* number of limbs of the sqrtrem remainder */ mp_size_t k; mp_size_t l; mpfr_limb_ptr rp, rp0; mpfr_limb_ptr up; mpfr_limb_ptr sp; mp_limb_t sticky0; /* truncated part of input */ mp_limb_t sticky1; /* truncated part of rp[0] */ mp_limb_t sticky; int odd_exp; int sh; /* number of extra bits in rp[0] */ int inexact; /* return ternary flag */ mpfr_exp_t expr; MPFR_TMP_DECL(marker); MPFR_LOG_FUNC (("x[%Pu]=%.*Rg rnd=%d", mpfr_get_prec (u), mpfr_log_prec, u, rnd_mode), ("y[%Pu]=%.*Rg inexact=%d", mpfr_get_prec (r), mpfr_log_prec, r, inexact)); if (MPFR_UNLIKELY(MPFR_IS_SINGULAR(u))) { if (MPFR_IS_NAN(u)) { MPFR_SET_NAN(r); MPFR_RET_NAN; } else if (MPFR_IS_ZERO(u)) { /* 0+ or 0- */ MPFR_SET_SAME_SIGN(r, u); MPFR_SET_ZERO(r); MPFR_RET(0); /* zero is exact */ } else { MPFR_ASSERTD(MPFR_IS_INF(u)); /* sqrt(-Inf) = NAN */ if (MPFR_IS_NEG(u)) { MPFR_SET_NAN(r); MPFR_RET_NAN; } MPFR_SET_POS(r); MPFR_SET_INF(r); MPFR_RET(0); } } if (MPFR_UNLIKELY(MPFR_IS_NEG(u))) { MPFR_SET_NAN(r); MPFR_RET_NAN; } MPFR_SET_POS(r); MPFR_TMP_MARK (marker); MPFR_UNSIGNED_MINUS_MODULO(sh,MPFR_PREC(r)); if (sh == 0 && rnd_mode == MPFR_RNDN) sh = GMP_NUMB_BITS; /* ugly case */ rsize = MPFR_LIMB_SIZE(r) + (sh == GMP_NUMB_BITS); /* rsize is the number of limbs of r + 1 if exact limb multiple and rounding to nearest, this is the number of wanted limbs for the square root */ rrsize = rsize + rsize; usize = MPFR_LIMB_SIZE(u); /* number of limbs of u */ rp0 = MPFR_MANT(r); rp = (sh < GMP_NUMB_BITS) ? rp0 : MPFR_TMP_LIMBS_ALLOC (rsize); up = MPFR_MANT(u); sticky0 = MPFR_LIMB_ZERO; /* truncated part of input */ sticky1 = MPFR_LIMB_ZERO; /* truncated part of rp[0] */ odd_exp = (unsigned int) MPFR_GET_EXP (u) & 1; inexact = -1; /* return ternary flag */ sp = MPFR_TMP_LIMBS_ALLOC (rrsize); /* copy the most significant limbs of u to {sp, rrsize} */ if (MPFR_LIKELY(usize <= rrsize)) /* in case r and u have the same precision, we have indeed rrsize = 2 * usize */ { k = rrsize - usize; if (MPFR_LIKELY(k)) MPN_ZERO (sp, k); if (odd_exp) { if (MPFR_LIKELY(k)) sp[k - 1] = mpn_rshift (sp + k, up, usize, 1); else sticky0 = mpn_rshift (sp, up, usize, 1); } else MPN_COPY (sp + rrsize - usize, up, usize); } else /* usize > rrsize: truncate the input */ { k = usize - rrsize; if (odd_exp) sticky0 = mpn_rshift (sp, up + k, rrsize, 1); else MPN_COPY (sp, up + k, rrsize); l = k; while (sticky0 == MPFR_LIMB_ZERO && l != 0) sticky0 = up[--l]; } /* sticky0 is non-zero iff the truncated part of the input is non-zero */ /* mpn_rootrem with NULL 2nd argument is faster than mpn_sqrtrem, thus use it if available and if the user asked to use GMP internal functions */ #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_ROOTREM) tsize = __gmpn_rootrem (rp, NULL, sp, rrsize, 2); #else tsize = mpn_sqrtrem (rp, NULL, sp, rrsize); #endif /* a return value of zero in mpn_sqrtrem indicates a perfect square */ sticky = sticky0 || tsize != 0; /* truncate low bits of rp[0] */ sticky1 = rp[0] & ((sh < GMP_NUMB_BITS) ? MPFR_LIMB_MASK(sh) : ~MPFR_LIMB_ZERO); rp[0] -= sticky1; sticky = sticky || sticky1; expr = (MPFR_GET_EXP(u) + odd_exp) / 2; /* exact */ if (rnd_mode == MPFR_RNDZ || rnd_mode == MPFR_RNDD || sticky == MPFR_LIMB_ZERO) { inexact = (sticky == MPFR_LIMB_ZERO) ? 0 : -1; goto truncate; } else if (rnd_mode == MPFR_RNDN) { /* if sh < GMP_NUMB_BITS, the round bit is bit (sh-1) of sticky1 and the sticky bit is formed by the low sh-1 bits from sticky1, together with the sqrtrem remainder and sticky0. */ if (sh < GMP_NUMB_BITS) { if (sticky1 & (MPFR_LIMB_ONE << (sh - 1))) { /* round bit is set */ if (sticky1 == (MPFR_LIMB_ONE << (sh - 1)) && tsize == 0 && sticky0 == 0) goto even_rule; else goto add_one_ulp; } else /* round bit is zero */ goto truncate; /* with the default inexact=-1 */ } else /* sh = GMP_NUMB_BITS: the round bit is the most significant bit of rp[0], and the remaining GMP_NUMB_BITS-1 bits contribute to the sticky bit */ { if (sticky1 & MPFR_LIMB_HIGHBIT) { /* round bit is set */ if (sticky1 == MPFR_LIMB_HIGHBIT && tsize == 0 && sticky0 == 0) goto even_rule; else goto add_one_ulp; } else /* round bit is zero */ goto truncate; /* with the default inexact=-1 */ } } else /* rnd_mode=GMP_RDNU, necessarily sticky <> 0, thus add 1 ulp */ goto add_one_ulp; even_rule: /* has to set inexact */ if (sh < GMP_NUMB_BITS) inexact = (rp[0] & (MPFR_LIMB_ONE << sh)) ? 1 : -1; else inexact = (rp[1] & MPFR_LIMB_ONE) ? 1 : -1; if (inexact == -1) goto truncate; /* else go through add_one_ulp */ add_one_ulp: inexact = 1; /* always here */ if (sh == GMP_NUMB_BITS) { rp ++; rsize --; sh = 0; } if (mpn_add_1 (rp0, rp, rsize, MPFR_LIMB_ONE << sh)) { expr ++; rp[rsize - 1] = MPFR_LIMB_HIGHBIT; } goto end; truncate: /* inexact = 0 or -1 */ if (sh == GMP_NUMB_BITS) MPN_COPY (rp0, rp + 1, rsize - 1); end: MPFR_ASSERTN (expr >= MPFR_EMIN_MIN && expr <= MPFR_EMAX_MAX); MPFR_EXP (r) = expr; MPFR_TMP_FREE(marker); return mpfr_check_range (r, inexact, rnd_mode); }
void mpz_sqrtrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr op) { mp_size_t op_size, root_size, rem_size; mp_ptr root_ptr, op_ptr; mp_ptr free_me = NULL; mp_size_t free_me_size; TMP_DECL; TMP_MARK; op_size = op->_mp_size; if (op_size <= 0) { if (op_size < 0) SQRT_OF_NEGATIVE; SIZ(root) = 0; SIZ(rem) = 0; return; } if (rem->_mp_alloc < op_size) _mpz_realloc (rem, op_size); /* The size of the root is accurate after this simple calculation. */ root_size = (op_size + 1) / 2; root_ptr = root->_mp_d; op_ptr = op->_mp_d; if (root->_mp_alloc < root_size) { if (root_ptr == op_ptr) { free_me = root_ptr; free_me_size = root->_mp_alloc; } else (*__gmp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB); root->_mp_alloc = root_size; root_ptr = (mp_ptr) (*__gmp_allocate_func) (root_size * BYTES_PER_MP_LIMB); root->_mp_d = root_ptr; } else { /* Make OP not overlap with ROOT. */ if (root_ptr == op_ptr) { /* ROOT and OP are identical. Allocate temporary space for OP. */ op_ptr = (mp_ptr) TMP_ALLOC (op_size * BYTES_PER_MP_LIMB); /* Copy to the temporary space. Hack: Avoid temporary variable by using ROOT_PTR. */ MPN_COPY (op_ptr, root_ptr, op_size); } } rem_size = mpn_sqrtrem (root_ptr, rem->_mp_d, op_ptr, op_size); root->_mp_size = root_size; /* Write remainder size last, to enable us to define this function to give only the square root remainder, if the user calls if with ROOT == REM. */ rem->_mp_size = rem_size; if (free_me != NULL) (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB); TMP_FREE; }
static inline RawArray<mp_limb_t> sqrt_helper(RawArray<mp_limb_t> result, RawArray<const mp_limb_t> x) { const auto s = result.slice(0,(1+x.size())/2); mpn_sqrtrem(s.data(),0,x.data(),x.size()); return trim(s); }