/* CFB-8 uses slight optimization: it encrypts or decrypts up to block_size * bytes and does memcpy/memxor afterwards */ void cfb8_encrypt(const void *ctx, nettle_cipher_func *f, size_t block_size, uint8_t *iv, size_t length, uint8_t *dst, const uint8_t *src) { TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE * 2); TMP_DECL(outbuf, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE); TMP_ALLOC(buffer, block_size * 2); TMP_ALLOC(outbuf, block_size); uint8_t pos; memcpy(buffer, iv, block_size); pos = 0; while (length) { uint8_t t; if (pos == block_size) { memcpy(buffer, buffer + block_size, block_size); pos = 0; } f(ctx, block_size, outbuf, buffer + pos); t = *(dst++) = *(src++) ^ outbuf[0]; buffer[pos + block_size] = t; length--; pos ++; } memcpy(iv, buffer + pos, block_size); }
void mpz_fdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor) { mp_size_t divisor_size = divisor->_mp_size; mp_size_t xsize; mpz_t temp_divisor; /* N.B.: lives until function returns! */ TMP_DECL (marker); TMP_MARK (marker); /* We need the original value of the divisor after the quotient and remainder have been preliminary calculated. We have to copy it to temporary space if it's the same variable as either QUOT or REM. */ if (quot == divisor || rem == divisor) { MPZ_TMP_INIT (temp_divisor, ABS (divisor_size)); mpz_set (temp_divisor, divisor); divisor = temp_divisor; } xsize = dividend->_mp_size ^ divisor_size;; mpz_tdiv_qr (quot, rem, dividend, divisor); if (xsize < 0 && rem->_mp_size != 0) { mpz_sub_ui (quot, quot, 1L); mpz_add (rem, rem, divisor); } TMP_FREE (marker); }
/* This should be made into an inline function in gmp.h. */ void mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) { TMP_DECL (marker); TMP_MARK (marker); if (up == vp) { if (size < KARATSUBA_THRESHOLD) { impn_sqr_n_basecase (prodp, up, size); } else { mp_ptr tspace; tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB); impn_sqr_n (prodp, up, size, tspace); } } else { if (size < KARATSUBA_THRESHOLD) { impn_mul_n_basecase (prodp, up, vp, size); } else { mp_ptr tspace; tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB); impn_mul_n (prodp, up, vp, size, tspace); } } TMP_FREE (marker); }
void mpz_mod (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor) { mp_size_t divisor_size = divisor->_mp_size; mpz_t temp_divisor; /* N.B.: lives until function returns! */ TMP_DECL (marker); TMP_MARK (marker); /* We need the original value of the divisor after the remainder has been preliminary calculated. We have to copy it to temporary space if it's the same variable as REM. */ if (rem == divisor) { MPZ_TMP_INIT (temp_divisor, ABS (divisor_size)); mpz_set (temp_divisor, divisor); divisor = temp_divisor; } mpz_tdiv_r (rem, dividend, divisor); if (rem->_mp_size != 0) { if (dividend->_mp_size < 0) { if (divisor->_mp_size < 0) mpz_sub (rem, rem, divisor); else mpz_add (rem, rem, divisor); } } TMP_FREE (marker); }
void mpz_sqrt (mpz_ptr root, mpz_srcptr op) { mp_size_t op_size, root_size; mp_ptr root_ptr, op_ptr; mp_ptr free_me = NULL; mp_size_t free_me_size; TMP_DECL (marker); TMP_MARK (marker); op_size = op->_mp_size; if (op_size <= 0) { if (op_size < 0) SQRT_OF_NEGATIVE; SIZ(root) = 0; return; } /* The size of the root is accurate after this simple calculation. */ root_size = (op_size + 1) / 2; root_ptr = root->_mp_d; op_ptr = op->_mp_d; if (root->_mp_alloc < root_size) { if (root_ptr == op_ptr) { free_me = root_ptr; free_me_size = root->_mp_alloc; } else (*__gmp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB); root->_mp_alloc = root_size; root_ptr = (mp_ptr) (*__gmp_allocate_func) (root_size * BYTES_PER_MP_LIMB); root->_mp_d = root_ptr; } else { /* Make OP not overlap with ROOT. */ if (root_ptr == op_ptr) { /* ROOT and OP are identical. Allocate temporary space for OP. */ op_ptr = (mp_ptr) TMP_ALLOC (op_size * BYTES_PER_MP_LIMB); /* Copy to the temporary space. Hack: Avoid temporary variable by using ROOT_PTR. */ MPN_COPY (op_ptr, root_ptr, op_size); } } mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size); root->_mp_size = root_size; if (free_me != NULL) (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB); TMP_FREE (marker); }
void ecc_scalar_random (struct ecc_scalar *x, void *random_ctx, nettle_random_func *random) { TMP_DECL (scratch, mp_limb_t, ECC_MOD_RANDOM_ITCH (ECC_MAX_SIZE)); TMP_ALLOC (scratch, ECC_MOD_RANDOM_ITCH (x->ecc->q.size)); ecc_mod_random (&x->ecc->q, x->p, random_ctx, random, scratch); }
unsigned long int mpz_fdiv_r_ui (mpz_ptr rem, mpz_srcptr dividend, unsigned long int divisor) { mp_size_t ns, nn; mp_ptr np; mp_limb_t rl; if (divisor == 0) DIVIDE_BY_ZERO; ns = SIZ(dividend); if (ns == 0) { SIZ(rem) = 0; return 0; } nn = ABS(ns); np = PTR(dividend); #if GMP_NAIL_BITS != 0 if (divisor > GMP_NUMB_MAX) { mp_limb_t dp[2]; mp_ptr rp, qp; mp_size_t rn; TMP_DECL (mark); MPZ_REALLOC (rem, 2); rp = PTR(rem); if (nn == 1) /* tdiv_qr requirements; tested above for 0 */ { rl = np[0]; rp[0] = rl; } else { TMP_MARK (mark); dp[0] = divisor & GMP_NUMB_MASK; dp[1] = divisor >> GMP_NUMB_BITS; qp = TMP_ALLOC_LIMBS (nn - 2 + 1); mpn_tdiv_qr (qp, rp, (mp_size_t) 0, np, nn, dp, (mp_size_t) 2); TMP_FREE (mark); rl = rp[0] + (rp[1] << GMP_NUMB_BITS); } if (rl != 0 && ns < 0) { rl = divisor - rl; rp[0] = rl & GMP_NUMB_MASK; rp[1] = rl >> GMP_NUMB_BITS; }
/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and t is defined by (tp,mn). */ static void reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn) { mp_ptr qp; TMP_DECL (marker); TMP_MARK (marker); qp = TMP_ALLOC_LIMBS (an - mn + 1); mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn); TMP_FREE (marker); }
void cfb8_decrypt(const void *ctx, nettle_cipher_func *f, size_t block_size, uint8_t *iv, size_t length, uint8_t *dst, const uint8_t *src) { TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE * 2); TMP_DECL(outbuf, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE * 2); TMP_ALLOC(buffer, block_size * 2); TMP_ALLOC(outbuf, block_size * 2); uint8_t i = 0; memcpy(buffer, iv, block_size); memcpy(buffer + block_size, src, length < block_size ? length : block_size); while (length) { for (i = 0; i < length && i < block_size; i++) f(ctx, block_size, outbuf + i, buffer + i); memxor3(dst, src, outbuf, i); length -= i; src += i; dst += i; memcpy(buffer, buffer + block_size, block_size); memcpy(buffer + block_size, src, length < block_size ? length : block_size); } memcpy(iv, buffer + i, block_size); }
void mpn_sqr_n (mp_ptr prodp, mp_srcptr up, mp_size_t un) { ASSERT (un >= 1); ASSERT (! MPN_OVERLAP_P (prodp, 2*un, up, un)); /* FIXME: Can this be removed? */ if (un == 0) return; if (BELOW_THRESHOLD (un, SQR_BASECASE_THRESHOLD)) { /* mul_basecase is faster than sqr_basecase on small sizes sometimes */ mpn_mul_basecase (prodp, up, un, up, un); } else if (BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD)) { /* plain schoolbook multiplication */ mpn_sqr_basecase (prodp, up, un); } else if (BELOW_THRESHOLD (un, SQR_TOOM3_THRESHOLD)) { /* karatsuba multiplication */ mp_ptr tspace; TMP_DECL (marker); TMP_MARK (marker); tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (un)); mpn_kara_sqr_n (prodp, up, un, tspace); TMP_FREE (marker); } #if WANT_FFT || TUNE_PROGRAM_BUILD else if (BELOW_THRESHOLD (un, SQR_FFT_THRESHOLD)) #else else #endif { /* Toom3 multiplication. Use workspace from the heap, as stack may be limited. Since n is at least MUL_TOOM3_THRESHOLD, the multiplication will take much longer than malloc()/free(). */ mp_ptr tspace; mp_size_t tsize; tsize = MPN_TOOM3_SQR_N_TSIZE (un); tspace = __GMP_ALLOCATE_FUNC_LIMBS (tsize); mpn_toom3_sqr_n (prodp, up, un, tspace); __GMP_FREE_FUNC_LIMBS (tspace, tsize); } #if WANT_FFT || TUNE_PROGRAM_BUILD else {
static int mpfr_list_sum (mpfr_ptr ret, mpfr_t *tab, unsigned long n, mp_rnd_t rnd) { mpfr_ptr *tabtmp; unsigned long i; int inexact; TMP_DECL(marker); TMP_MARK(marker); tabtmp = (mpfr_ptr *) TMP_ALLOC(n * sizeof(mpfr_srcptr)); for (i = 0; i < n; i++) tabtmp[i] = tab[i]; inexact = mpfr_sum (ret, tabtmp, n, rnd); TMP_FREE(marker); return inexact; }
int mpz_invert (mpz_ptr inverse, mpz_srcptr x, mpz_srcptr n) { mpz_t gcd, tmp; mp_size_t xsize, nsize, size; TMP_DECL (marker); xsize = SIZ (x); nsize = SIZ (n); xsize = ABS (xsize); nsize = ABS (nsize); size = MAX (xsize, nsize) + 1; /* No inverse exists if the leftside operand is 0. Likewise, no inverse exists if the mod operand is 1. */ if (xsize == 0 || (nsize == 1 && (PTR (n))[0] == 1)) return 0; TMP_MARK (marker); MPZ_TMP_INIT (gcd, size); MPZ_TMP_INIT (tmp, size); mpz_gcdext (gcd, tmp, (mpz_ptr) 0, x, n); /* If no inverse existed, return with an indication of that. */ if (SIZ (gcd) != 1 || PTR(gcd)[0] != 1) { TMP_FREE (marker); return 0; } /* Make sure we return a positive inverse. */ if (SIZ (tmp) < 0) { if (SIZ (n) < 0) mpz_sub (inverse, tmp, n); else mpz_add (inverse, tmp, n); } else mpz_set (inverse, tmp); TMP_FREE (marker); return 1; }
int pkcs1_rsa_sha256_encode_digest(mpz_t m, unsigned size, const uint8_t *digest) { TMP_DECL(em, uint8_t, NETTLE_MAX_BIGNUM_BITS / 8); TMP_ALLOC(em, size); if (pkcs1_signature_prefix(size, em, sizeof(sha256_prefix), sha256_prefix, SHA256_DIGEST_SIZE)) { memcpy(em + size - SHA256_DIGEST_SIZE, digest, SHA256_DIGEST_SIZE); nettle_mpz_set_str_256_u(m, size, em); return 1; } else return 0; }
int pkcs1_rsa_sha256_encode(mpz_t m, unsigned size, struct sha256_ctx *hash) { TMP_DECL(em, uint8_t, NETTLE_MAX_BIGNUM_BITS / 8); TMP_ALLOC(em, size); if (pkcs1_signature_prefix(size, em, sizeof(sha256_prefix), sha256_prefix, SHA256_DIGEST_SIZE)) { sha256_digest(hash, SHA256_DIGEST_SIZE, em + size - SHA256_DIGEST_SIZE); nettle_mpz_set_str_256_u(m, size, em); return 1; } else return 0; }
/* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q using naive method with O(l) multiplications. Return the number of iterations l. The absolute error on s is less than 3*l*(l+1)*2^(-q). Version using fixed-point arithmetic with mpz instead of mpfr for internal computations. s must have at least qn+1 limbs (qn should be enough, but currently fails since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs) */ static unsigned long mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, mp_prec_t q, mp_exp_t *exps) { unsigned long l; mp_exp_t dif; mp_size_t qn; mpz_t t, rr; mp_exp_t expt, expr; TMP_DECL(marker); TMP_MARK(marker); qn = 1 + (q-1)/BITS_PER_MP_LIMB; expt = 0; *exps = 1 - (mp_exp_t) q; /* s = 2^(q-1) */ MY_INIT_MPZ(t, 2*qn+1); MY_INIT_MPZ(rr, qn+1); mpz_set_ui(t, 1); mpz_set_ui(s, 1); mpz_mul_2exp(s, s, q-1); expr = mpfr_get_z_exp(rr, r); /* no error here */ l = 0; do { l++; mpz_mul(t, t, rr); expt += expr; dif = *exps + mpz_sizeinbase(s, 2) - expt - mpz_sizeinbase(t, 2); /* truncates the bits of t which are < ulp(s) = 2^(1-q) */ expt += mpz_normalize(t, t, (mp_exp_t) q-dif); /* error at most 2^(1-q) */ mpz_div_ui(t, t, l); /* error at most 2^(1-q) */ /* the error wrt t^l/l! is here at most 3*l*ulp(s) */ MPFR_ASSERTD (expt == *exps); mpz_add(s, s, t); /* no error here: exact */ /* ensures rr has the same size as t: after several shifts, the error on rr is still at most ulp(t)=ulp(s) */ expr += mpz_normalize(rr, rr, mpz_sizeinbase(t, 2)); } while (mpz_cmp_ui(t, 0)); TMP_FREE(marker); return l; }
mp_limb_t mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb) { mp_ptr p0, p1, tp; mp_limb_t cy_limb; TMP_DECL (marker); TMP_MARK (marker); p1 = TMP_ALLOC (n * BYTES_PER_MP_LIMB); p0 = TMP_ALLOC (n * BYTES_PER_MP_LIMB); tp = TMP_ALLOC (n * BYTES_PER_MP_LIMB); GMPN_MULWW (p1, p0, up, &n, &limb); cy_limb = mpn_add_n (tp, rp, p0, n); rp[0] = tp[0]; cy_limb += mpn_add_n (rp + 1, tp + 1, p1, n - 1); cy_limb += p1[n - 1]; TMP_FREE (marker); return cy_limb; }
void cfb_encrypt(const void *ctx, nettle_cipher_func *f, size_t block_size, uint8_t *iv, size_t length, uint8_t *dst, const uint8_t *src) { uint8_t *p; TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE); TMP_ALLOC(buffer, block_size); if (src != dst) { for (p = iv; length >= block_size; p = dst, dst += block_size, src += block_size, length -= block_size) { f(ctx, block_size, dst, p); memxor(dst, src, block_size); } } else { for (p = iv; length >= block_size; p = dst, dst += block_size, src += block_size, length -= block_size) { f(ctx, block_size, buffer, p); memxor(dst, buffer, block_size); } } if (p != iv) memcpy(iv, p, block_size); if (length) { f(ctx, block_size, buffer, iv); memxor3(dst, buffer, src, length); /* We do not care about updating IV here. This is the last call in * message sequence and one has to set IV afterwards anyway */ } }
/* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q using naive method with O(l) multiplications. Return the number of iterations l. The absolute error on s is less than 3*l*(l+1)*2^(-q). Version using fixed-point arithmetic with mpz instead of mpfr for internal computations. s must have at least qn+1 limbs (qn should be enough, but currently fails since mpz_mul_2exp(s, s, q-1) reallocates qn+1 limbs) */ static int mpfr_exp2_aux (mpz_t s, mpfr_srcptr r, int q, int *exps) { int l, dif, qn; mpz_t t, rr; mp_exp_t expt, expr; TMP_DECL(marker); TMP_MARK(marker); qn = 1 + (q-1)/BITS_PER_MP_LIMB; MY_INIT_MPZ(t, 2*qn+1); /* 2*qn+1 is neeeded since mpz_div_2exp may need an extra limb */ MY_INIT_MPZ(rr, qn+1); mpz_set_ui(t, 1); expt=0; mpz_set_ui(s, 1); mpz_mul_2exp(s, s, q-1); *exps = 1-q; /* s = 2^(q-1) */ expr = mpfr_get_z_exp(rr, r); /* no error here */ l = 0; do { l++; mpz_mul(t, t, rr); expt=expt+expr; dif = *exps + mpz_sizeinbase(s, 2) - expt - mpz_sizeinbase(t, 2); /* truncates the bits of t which are < ulp(s) = 2^(1-q) */ expt += mpz_normalize(t, t, q-dif); /* error at most 2^(1-q) */ mpz_div_ui(t, t, l); /* error at most 2^(1-q) */ /* the error wrt t^l/l! is here at most 3*l*ulp(s) */ #ifdef DEBUG if (expt != *exps) { fprintf(stderr, "Error: expt != exps %d %d\n", expt, *exps); exit(1); } #endif mpz_add(s, s, t); /* no error here: exact */ /* ensures rr has the same size as t: after several shifts, the error on rr is still at most ulp(t)=ulp(s) */ expr += mpz_normalize(rr, rr, mpz_sizeinbase(t, 2)); } while (mpz_cmp_ui(t, 0)); TMP_FREE(marker); return l; }
int pkcs1_decrypt (unsigned key_size, const mpz_t m, unsigned *length, uint8_t *message) { TMP_DECL(em, uint8_t, NETTLE_MAX_BIGNUM_SIZE); uint8_t *terminator; unsigned padding; unsigned message_length; TMP_ALLOC(em, key_size); nettle_mpz_get_str_256(key_size, em, m); /* Check format */ if (em[0] || em[1] != 2) return 0; terminator = memchr(em + 2, 0, key_size - 2); if (!terminator) return 0; padding = terminator - (em + 2); if (padding < 8) return 0; message_length = key_size - 3 - padding; if (*length < message_length) return 0; memcpy(message, terminator + 1, message_length); *length = message_length; return 1; }
/* For now, also disable REDC when MOD is even, as the inverse can't handle that. At some point, we might want to make the code faster for that case, perhaps using CRR. */ #ifndef POWM_THRESHOLD #define POWM_THRESHOLD ((8 * SQR_KARATSUBA_THRESHOLD) / 3) #endif #define HANDLE_NEGATIVE_EXPONENT 1 #undef REDUCE_EXPONENT void #ifndef BERKELEY_MP mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m) #else /* BERKELEY_MP */ pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r) #endif /* BERKELEY_MP */ { mp_ptr xp, tp, qp, gp, this_gp; mp_srcptr bp, ep, mp; mp_size_t bn, es, en, mn, xn; mp_limb_t invm, c; unsigned long int enb; mp_size_t i, K, j, l, k; int m_zero_cnt, e_zero_cnt; int sh; int use_redc; #if HANDLE_NEGATIVE_EXPONENT mpz_t new_b; #endif #if REDUCE_EXPONENT mpz_t new_e; #endif TMP_DECL (marker); mp = PTR(m); mn = ABSIZ (m); if (mn == 0) DIVIDE_BY_ZERO; TMP_MARK (marker); es = SIZ (e); if (es <= 0) { if (es == 0) { /* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if m equals 1. */ SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; PTR(r)[0] = 1; TMP_FREE (marker); /* we haven't really allocated anything here */ return; } #if HANDLE_NEGATIVE_EXPONENT MPZ_TMP_INIT (new_b, mn + 1); if (! mpz_invert (new_b, b, m)) DIVIDE_BY_ZERO; b = new_b; es = -es; #else DIVIDE_BY_ZERO; #endif } en = es; #if REDUCE_EXPONENT /* Reduce exponent by dividing it by phi(m) when m small. */ if (mn == 1 && mp[0] < 0x7fffffffL && en * GMP_NUMB_BITS > 150) { MPZ_TMP_INIT (new_e, 2); mpz_mod_ui (new_e, e, phi (mp[0])); e = new_e; } #endif use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0; if (use_redc) { /* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */ modlimb_invert (invm, mp[0]); invm = -invm; } else { /* Normalize m (i.e. make its most significant bit set) as required by division functions below. */ count_leading_zeros (m_zero_cnt, mp[mn - 1]); m_zero_cnt -= GMP_NAIL_BITS; if (m_zero_cnt != 0) { mp_ptr new_mp; new_mp = TMP_ALLOC_LIMBS (mn); mpn_lshift (new_mp, mp, mn, m_zero_cnt); mp = new_mp; } } /* Determine optimal value of k, the number of exponent bits we look at at a time. */ count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]); e_zero_cnt -= GMP_NAIL_BITS; enb = en * GMP_NUMB_BITS - e_zero_cnt; /* number of bits of exponent */ k = 1; K = 2; while (2 * enb > K * (2 + k * (3 + k))) { k++; K *= 2; } tp = TMP_ALLOC_LIMBS (2 * mn + 1); qp = TMP_ALLOC_LIMBS (mn + 1); gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn); /* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */ bn = ABSIZ (b); bp = PTR(b); /* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary for speed or correctness to do this when b and m have the same number of limbs, perhaps remove mpn_cmp call. */ if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0)) { /* Reduce possibly huge base while moving it to gp[0]. Use a function call to reduce, since we don't want the quotient allocation to live until function return. */ if (use_redc) { reduce (tp + mn, bp, bn, mp, mn); /* b mod m */ MPN_ZERO (tp, mn); mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */ } else { reduce (gp, bp, bn, mp, mn); } } else { /* |b| < m. We pad out operands to become mn limbs, which simplifies the rest of the function, but slows things down when the |b| << m. */ if (use_redc) { MPN_ZERO (tp, mn); MPN_COPY (tp + mn, bp, bn); MPN_ZERO (tp + mn + bn, mn - bn); mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); } else { MPN_COPY (gp, bp, bn); MPN_ZERO (gp + bn, mn - bn); } } /* Compute xx^i for odd g < 2^i. */ xp = TMP_ALLOC_LIMBS (mn); mpn_sqr_n (tp, gp, mn); if (use_redc) redc (xp, mp, mn, invm, tp); /* xx = x^2*R^n */ else mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn); this_gp = gp; for (i = 1; i < K / 2; i++) { mpn_mul_n (tp, this_gp, xp, mn); this_gp += mn; if (use_redc) redc (this_gp, mp, mn, invm, tp); /* g[i] = x^(2i+1)*R^n */ else mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn); } /* Start the real stuff. */ ep = PTR (e); i = en - 1; /* current index */ c = ep[i]; /* current limb */ sh = GMP_NUMB_BITS - e_zero_cnt; /* significant bits in ep[i] */ sh -= k; /* index of lower bit of ep[i] to take into account */ if (sh < 0) { /* k-sh extra bits are needed */ if (i > 0) { i--; c <<= (-sh); sh += GMP_NUMB_BITS; c |= ep[i] >> sh; } }
void cbc_decrypt(void *ctx, nettle_crypt_func *f, unsigned block_size, uint8_t *iv, unsigned length, uint8_t *dst, const uint8_t *src) { assert(!(length % block_size)); if (!length) return; if (src != dst) { /* Decrypt in ECB mode */ f(ctx, length, dst, src); /* XOR the cryptotext, shifted one block */ memxor(dst, iv, block_size); memxor(dst + block_size, src, length - block_size); memcpy(iv, src + length - block_size, block_size); } else { /* For in-place CBC, we decrypt into a temporary buffer of size * at most CBC_BUFFER_LIMIT, and process that amount of data at * a time. */ /* NOTE: We assume that block_size <= CBC_BUFFER_LIMIT, and we depend on memxor3 working from the end of the area, allowing certain overlapping operands. */ TMP_DECL(buffer, uint8_t, CBC_BUFFER_LIMIT); TMP_DECL(initial_iv, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE); unsigned buffer_size; if (length <= CBC_BUFFER_LIMIT) buffer_size = length; else buffer_size = CBC_BUFFER_LIMIT - (CBC_BUFFER_LIMIT % block_size); TMP_ALLOC(buffer, buffer_size); TMP_ALLOC(initial_iv, block_size); for ( ; length > buffer_size; length -= buffer_size, src += buffer_size, dst += buffer_size) { f(ctx, buffer_size, buffer, src); memcpy(initial_iv, iv, block_size); memcpy(iv, src + buffer_size - block_size, block_size); memxor3(dst + block_size, buffer + block_size, src, buffer_size - block_size); memxor3(dst, buffer, initial_iv, block_size); } f(ctx, length, buffer, src); memcpy(initial_iv, iv, block_size); /* Copies last block */ memcpy(iv, src + length - block_size, block_size); /* Writes all but first block, reads all but last block. */ memxor3(dst + block_size, buffer + block_size, src, length - block_size); /* Writes first block. */ memxor3(dst, buffer, initial_iv, block_size); } }
void mpf_add (mpf_ptr r, mpf_srcptr u, mpf_srcptr v) { mp_srcptr up, vp; mp_ptr rp, tp; mp_size_t usize, vsize, rsize; mp_size_t prec; mp_exp_t uexp; mp_size_t ediff; mp_limb_t cy; int negate; TMP_DECL (marker); usize = u->_mp_size; vsize = v->_mp_size; /* Handle special cases that don't work in generic code below. */ if (usize == 0) { set_r_v_maybe: if (r != v) mpf_set (r, v); return; } if (vsize == 0) { v = u; goto set_r_v_maybe; } /* If signs of U and V are different, perform subtraction. */ if ((usize ^ vsize) < 0) { __mpf_struct v_negated; v_negated._mp_size = -vsize; v_negated._mp_exp = v->_mp_exp; v_negated._mp_d = v->_mp_d; mpf_sub (r, u, &v_negated); return; } TMP_MARK (marker); /* Signs are now known to be the same. */ negate = usize < 0; /* Make U be the operand with the largest exponent. */ if (u->_mp_exp < v->_mp_exp) { mpf_srcptr t; t = u; u = v; v = t; usize = u->_mp_size; vsize = v->_mp_size; } usize = ABS (usize); vsize = ABS (vsize); up = u->_mp_d; vp = v->_mp_d; rp = r->_mp_d; prec = r->_mp_prec; uexp = u->_mp_exp; ediff = u->_mp_exp - v->_mp_exp; /* If U extends beyond PREC, ignore the part that does. */ if (usize > prec) { up += usize - prec; usize = prec; } /* If V extends beyond PREC, ignore the part that does. Note that this may make vsize negative. */ if (vsize + ediff > prec) { vp += vsize + ediff - prec; vsize = prec - ediff; } #if 0 /* Locate the least significant non-zero limb in (the needed parts of) U and V, to simplify the code below. */ while (up[0] == 0) up++, usize--; while (vp[0] == 0) vp++, vsize--; #endif /* Allocate temp space for the result. Allocate just vsize + ediff later??? */ tp = (mp_ptr) TMP_ALLOC (prec * BYTES_PER_MP_LIMB); if (ediff >= prec) { /* V completely cancelled. */ if (rp != up) MPN_COPY_INCR (rp, up, usize); rsize = usize; } else { /* uuuu | uuuu | uuuu | uuuu | uuuu */ /* vvvvvvv | vv | vvvvv | v | vv */ if (usize > ediff) { /* U and V partially overlaps. */ if (vsize + ediff <= usize) { /* uuuu */ /* v */ mp_size_t size; size = usize - ediff - vsize; MPN_COPY (tp, up, size); cy = mpn_add (tp + size, up + size, usize - size, vp, vsize); rsize = usize; } else { /* uuuu */ /* vvvvv */ mp_size_t size; size = vsize + ediff - usize; MPN_COPY (tp, vp, size); cy = mpn_add (tp + size, up, usize, vp + size, usize - ediff); rsize = vsize + ediff; } } else { /* uuuu */ /* vv */ mp_size_t size; size = vsize + ediff - usize; MPN_COPY (tp, vp, vsize); MPN_ZERO (tp + vsize, ediff - usize); MPN_COPY (tp + size, up, usize); cy = 0; rsize = size + usize; } MPN_COPY (rp, tp, rsize); rp[rsize] = cy; rsize += cy; uexp += cy; } r->_mp_size = negate ? -rsize : rsize; r->_mp_exp = uexp; TMP_FREE (marker); }
void cfb_decrypt(const void *ctx, nettle_cipher_func *f, size_t block_size, uint8_t *iv, size_t length, uint8_t *dst, const uint8_t *src) { if (src != dst) { size_t left = length % block_size; length -= left; if (length > 0) { /* Decrypt in ECB mode */ f(ctx, block_size, dst, iv); f(ctx, length - block_size, dst + block_size, src); memcpy(iv, src + length - block_size, block_size); memxor(dst, src, length); } if (left > 0) { TMP_DECL(buffer, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE); TMP_ALLOC(buffer, block_size); f(ctx, block_size, buffer, iv); memxor3(dst + length, src + length, buffer, left); } } else { /* For in-place CFB, we decrypt into a temporary buffer of size * at most CFB_BUFFER_LIMIT, and process that amount of data at * a time. */ /* NOTE: We assume that block_size <= CFB_BUFFER_LIMIT */ TMP_DECL(buffer, uint8_t, CFB_BUFFER_LIMIT); TMP_DECL(initial_iv, uint8_t, NETTLE_MAX_CIPHER_BLOCK_SIZE); size_t buffer_size; size_t left; buffer_size = CFB_BUFFER_LIMIT - (CFB_BUFFER_LIMIT % block_size); TMP_ALLOC(buffer, buffer_size); TMP_ALLOC(initial_iv, block_size); left = length % block_size; length -= left; while (length > 0) { size_t part = length > buffer_size ? buffer_size : length; /* length is greater that zero and is divided by block_size, so it is * not less than block_size. So does part */ f(ctx, block_size, buffer, iv); f(ctx, part - block_size, buffer + block_size, src); memcpy(iv, src + part - block_size, block_size); memxor(dst, buffer, part); length -= part; src += part; dst += part; } if (left > 0) { f(ctx, block_size, buffer, iv); memxor(dst, buffer, left); } } }
static unsigned long int lc (mp_ptr rp, gmp_randstate_t rstate) { mp_ptr tp, seedp, ap; mp_size_t ta; mp_size_t tn, seedn, an; unsigned long int m2exp; mp_limb_t c; TMP_DECL (mark); m2exp = rstate->_mp_algdata._mp_lc->_mp_m2exp; /* The code below assumes the mod part is a power of two. Make sure that is the case. */ ASSERT_ALWAYS (m2exp != 0); c = (mp_limb_t) rstate->_mp_algdata._mp_lc->_mp_c; seedp = PTR (rstate->_mp_seed); seedn = SIZ (rstate->_mp_seed); if (seedn == 0) { /* Seed is 0. Result is C % M. Assume table is sensibly stored, with C smaller than M*/ *rp = c; *seedp = c; SIZ (rstate->_mp_seed) = 1; return m2exp; } ap = PTR (rstate->_mp_algdata._mp_lc->_mp_a); an = SIZ (rstate->_mp_algdata._mp_lc->_mp_a); /* Allocate temporary storage. Let there be room for calculation of (A * seed + C) % M, or M if bigger than that. */ TMP_MARK (mark); ta = an + seedn + 1; tp = (mp_ptr) TMP_ALLOC (ta * BYTES_PER_MP_LIMB); /* t = a * seed */ if (seedn >= an) mpn_mul (tp, seedp, seedn, ap, an); else mpn_mul (tp, ap, an, seedp, seedn); tn = an + seedn; /* t = t + c */ tp[tn] = 0; /* sentinel, stops MPN_INCR_U */ MPN_INCR_U (tp, tn, c); ASSERT_ALWAYS (m2exp / GMP_NUMB_BITS < ta); /* t = t % m */ tp[m2exp / GMP_NUMB_BITS] &= ((mp_limb_t) 1 << m2exp % GMP_NUMB_BITS) - 1; tn = (m2exp + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS; /* Save result as next seed. */ MPN_COPY (PTR (rstate->_mp_seed), tp, tn); SIZ (rstate->_mp_seed) = tn; { /* Discard the lower m2exp/2 bits of result. */ unsigned long int bits = m2exp / 2; mp_size_t xn = bits / GMP_NUMB_BITS; tn -= xn; if (tn > 0) { unsigned int cnt = bits % GMP_NUMB_BITS; if (cnt != 0) { mpn_rshift (tp, tp + xn, tn, cnt); MPN_COPY_INCR (rp, tp, xn + 1); } else /* Even limb boundary. */ MPN_COPY_INCR (rp, tp + xn, tn); } } TMP_FREE (mark); /* Return number of valid bits in the result. */ return (m2exp + 1) / 2; }
int mpfr_div (mpfr_ptr q, mpfr_srcptr u, mpfr_srcptr v, mp_rnd_t rnd_mode) { mp_srcptr up, vp, bp; mp_size_t usize, vsize; mp_ptr ap, qp, rp; mp_size_t asize, bsize, qsize, rsize; mp_exp_t qexp; mp_size_t err, k; mp_limb_t tonearest; int inex, sh, can_round = 0, sign_quotient; unsigned int cc = 0, rw; TMP_DECL (marker); /************************************************************************** * * * This part of the code deals with special cases * * * **************************************************************************/ if (MPFR_ARE_SINGULAR(u,v)) { if (MPFR_IS_NAN(u) || MPFR_IS_NAN(v)) { MPFR_SET_NAN(q); MPFR_RET_NAN; } sign_quotient = MPFR_MULT_SIGN( MPFR_SIGN(u) , MPFR_SIGN(v) ); MPFR_SET_SIGN(q, sign_quotient); if (MPFR_IS_INF(u)) { if (MPFR_IS_INF(v)) { MPFR_SET_NAN(q); MPFR_RET_NAN; } else { MPFR_SET_INF(q); MPFR_RET(0); } } else if (MPFR_IS_INF(v)) { MPFR_SET_ZERO(q); MPFR_RET(0); } else if (MPFR_IS_ZERO(v)) { if (MPFR_IS_ZERO(u)) { MPFR_SET_NAN(q); MPFR_RET_NAN; } else { MPFR_SET_INF(q); MPFR_RET(0); } } else { MPFR_ASSERTD(MPFR_IS_ZERO(u)); MPFR_SET_ZERO(q); MPFR_RET(0); } } MPFR_CLEAR_FLAGS(q); /************************************************************************** * * * End of the part concerning special values. * * * **************************************************************************/ sign_quotient = MPFR_MULT_SIGN( MPFR_SIGN(u) , MPFR_SIGN(v) ); up = MPFR_MANT(u); vp = MPFR_MANT(v); MPFR_SET_SIGN(q, sign_quotient); TMP_MARK (marker); usize = MPFR_LIMB_SIZE(u); vsize = MPFR_LIMB_SIZE(v); /************************************************************************** * * * First try to use only part of u, v. If this is not sufficient, * * use the full u and v, to avoid long computations eg. in the case * * u = v. * * * **************************************************************************/ /* The dividend is a, length asize. The divisor is b, length bsize. */ qsize = (MPFR_PREC(q) + 3) / BITS_PER_MP_LIMB + 1; /* in case PREC(q)=PREC(v), then vsize=qsize with probability 1-4/b where b is the number of bits per limb */ if (MPFR_LIKELY(vsize <= qsize)) { bsize = vsize; bp = vp; } else /* qsize < vsize: take only the qsize high limbs of the divisor */ { bsize = qsize; bp = (mp_srcptr) vp + (vsize - qsize); } /* we have {bp, bsize} * (1 + errb) = (true divisor) with 0 <= errb < 2^(-qsize*BITS_PER_MP_LIMB+1) */ asize = bsize + qsize; ap = (mp_ptr) TMP_ALLOC (asize * BYTES_PER_MP_LIMB); /* if all arguments have same precision, then asize will be about 2*usize */ if (MPFR_LIKELY(asize > usize)) { /* copy u into the high limbs of {ap, asize}, and pad with zeroes */ /* FIXME: could we copy only the qsize high limbs of the dividend? */ MPN_COPY (ap + asize - usize, up, usize); MPN_ZERO (ap, asize - usize); } else /* truncate the high asize limbs of u into {ap, asize} */ MPN_COPY (ap, up + usize - asize, asize); /* we have {ap, asize} = (true dividend) * (1 - erra) with 0 <= erra < 2^(-asize*BITS_PER_MP_LIMB). This {ap, asize} / {bp, bsize} = (true dividend) / (true divisor) * (1 - erra) (1 + errb) */ /* Allocate limbs for quotient and remainder. */ qp = (mp_ptr) TMP_ALLOC ((qsize + 1) * BYTES_PER_MP_LIMB); rp = (mp_ptr) TMP_ALLOC (bsize * BYTES_PER_MP_LIMB); rsize = bsize; mpn_tdiv_qr (qp, rp, 0, ap, asize, bp, bsize); sh = - (int) qp[qsize]; /* since u and v are normalized, sh is 0 or -1 */ /* we have {qp, qsize + 1} = {ap, asize} / {bp, bsize} (1 - errq) with 0 <= errq < 2^(-qsize*BITS_PER_MP_LIMB+1+sh) thus {qp, qsize + 1} = (true dividend) / (true divisor) * (1 - erra) (1 + errb) (1 - errq). In fact, since the truncated dividend and {rp, bsize} do not overlap, we have: {qp, qsize + 1} = (true dividend) / (true divisor) * (1 - erra') (1 + errb) where 0 <= erra' < 2^(-qsize*BITS_PER_MP_LIMB+sh) */ /* Estimate number of correct bits. */ err = qsize * BITS_PER_MP_LIMB; /* We want to check if rounding is possible, but without normalizing because we might have to divide again if rounding is impossible, or if the result might be exact. We have however to mimic normalization */ /* To detect asap if the result is inexact, so as to avoid doing the division completely, we perform the following check : - if rnd_mode != GMP_RNDN, and the result is exact, we are unable to round simultaneously to zero and to infinity ; - if rnd_mode == GMP_RNDN, and if we can round to zero with one extra bit of precision, we can decide rounding. Hence in that case, check as in the case of GMP_RNDN, with one extra bit. Note that in the case of close to even rounding we shall do the division completely, but this is necessary anyway : we need to know whether this is really even rounding or not. */ if (MPFR_UNLIKELY(asize < usize || bsize < vsize)) { { mp_rnd_t rnd_mode1, rnd_mode2; mp_exp_t tmp_exp; mp_prec_t tmp_prec; if (bsize < vsize) err -= 2; /* divisor is truncated */ #if 0 /* commented this out since the truncation of the dividend is already taken into account in {rp, bsize}, which does not overlap with the neglected part of the dividend */ else if (asize < usize) err --; /* dividend is truncated */ #endif if (MPFR_LIKELY(rnd_mode == GMP_RNDN)) { rnd_mode1 = GMP_RNDZ; rnd_mode2 = MPFR_IS_POS_SIGN(sign_quotient) ? GMP_RNDU : GMP_RNDD; sh++; } else { rnd_mode1 = rnd_mode; switch (rnd_mode) { case GMP_RNDU: rnd_mode2 = GMP_RNDD; break; case GMP_RNDD: rnd_mode2 = GMP_RNDU; break; default: rnd_mode2 = MPFR_IS_POS_SIGN(sign_quotient) ? GMP_RNDU : GMP_RNDD; break; } } tmp_exp = err + sh + BITS_PER_MP_LIMB; tmp_prec = MPFR_PREC(q) + sh + BITS_PER_MP_LIMB; can_round = mpfr_can_round_raw (qp, qsize + 1, sign_quotient, tmp_exp, GMP_RNDN, rnd_mode1, tmp_prec) & mpfr_can_round_raw (qp, qsize + 1, sign_quotient, tmp_exp, GMP_RNDN, rnd_mode2, tmp_prec); /* restore original value of sh, i.e. sh = - qp[qsize] */ sh -= (rnd_mode == GMP_RNDN); }
/* s <- 1 + r/1! + r^2/2! + ... + r^l/l! while MPFR_EXP(r^l/l!)+MPFR_EXPR(r)>-q using Brent/Kung method with O(sqrt(l)) multiplications. Return l. Uses m multiplications of full size and 2l/m of decreasing size, i.e. a total equivalent to about m+l/m full multiplications, i.e. 2*sqrt(l) for m=sqrt(l). Version using mpz. ss must have at least (sizer+1) limbs. The error is bounded by (l^2+4*l) ulps where l is the return value. */ static unsigned long mpfr_exp2_aux2 (mpz_t s, mpfr_srcptr r, mp_prec_t q, mp_exp_t *exps) { mp_exp_t expr, *expR, expt; mp_size_t sizer; mp_prec_t ql; unsigned long l, m, i; mpz_t t, *R, rr, tmp; TMP_DECL(marker); /* estimate value of l */ MPFR_ASSERTD (MPFR_GET_EXP (r) < 0); l = q / (- MPFR_GET_EXP (r)); m = __gmpfr_isqrt (l); /* we access R[2], thus we need m >= 2 */ if (m < 2) m = 2; TMP_MARK(marker); R = (mpz_t*) TMP_ALLOC((m+1)*sizeof(mpz_t)); /* R[i] is r^i */ expR = (mp_exp_t*) TMP_ALLOC((m+1)*sizeof(mp_exp_t)); /* exponent for R[i] */ sizer = 1 + (MPFR_PREC(r)-1)/BITS_PER_MP_LIMB; mpz_init(tmp); MY_INIT_MPZ(rr, sizer+2); MY_INIT_MPZ(t, 2*sizer); /* double size for products */ mpz_set_ui(s, 0); *exps = 1-q; /* 1 ulp = 2^(1-q) */ for (i = 0 ; i <= m ; i++) MY_INIT_MPZ(R[i], sizer+2); expR[1] = mpfr_get_z_exp(R[1], r); /* exact operation: no error */ expR[1] = mpz_normalize2(R[1], R[1], expR[1], 1-q); /* error <= 1 ulp */ mpz_mul(t, R[1], R[1]); /* err(t) <= 2 ulps */ mpz_div_2exp(R[2], t, q-1); /* err(R[2]) <= 3 ulps */ expR[2] = 1-q; for (i = 3 ; i <= m ; i++) { mpz_mul(t, R[i-1], R[1]); /* err(t) <= 2*i-2 */ mpz_div_2exp(R[i], t, q-1); /* err(R[i]) <= 2*i-1 ulps */ expR[i] = 1-q; } mpz_set_ui (R[0], 1); mpz_mul_2exp (R[0], R[0], q-1); expR[0] = 1-q; /* R[0]=1 */ mpz_set_ui (rr, 1); expr = 0; /* rr contains r^l/l! */ /* by induction: err(rr) <= 2*l ulps */ l = 0; ql = q; /* precision used for current giant step */ do { /* all R[i] must have exponent 1-ql */ if (l != 0) for (i = 0 ; i < m ; i++) expR[i] = mpz_normalize2 (R[i], R[i], expR[i], 1-ql); /* the absolute error on R[i]*rr is still 2*i-1 ulps */ expt = mpz_normalize2 (t, R[m-1], expR[m-1], 1-ql); /* err(t) <= 2*m-1 ulps */ /* computes t = 1 + r/(l+1) + ... + r^(m-1)*l!/(l+m-1)! using Horner's scheme */ for (i = m-1 ; i-- != 0 ; ) { mpz_div_ui(t, t, l+i+1); /* err(t) += 1 ulp */ mpz_add(t, t, R[i]); } /* now err(t) <= (3m-2) ulps */ /* now multiplies t by r^l/l! and adds to s */ mpz_mul(t, t, rr); expt += expr; expt = mpz_normalize2(t, t, expt, *exps); /* err(t) <= (3m-1) + err_rr(l) <= (3m-2) + 2*l */ MPFR_ASSERTD (expt == *exps); mpz_add(s, s, t); /* no error here */ /* updates rr, the multiplication of the factors l+i could be done using binary splitting too, but it is not sure it would save much */ mpz_mul(t, rr, R[m]); /* err(t) <= err(rr) + 2m-1 */ expr += expR[m]; mpz_set_ui (tmp, 1); for (i = 1 ; i <= m ; i++) mpz_mul_ui (tmp, tmp, l + i); mpz_fdiv_q(t, t, tmp); /* err(t) <= err(rr) + 2m */ expr += mpz_normalize(rr, t, ql); /* err_rr(l+1) <= err_rr(l) + 2m+1 */ ql = q - *exps - mpz_sizeinbase(s, 2) + expr + mpz_sizeinbase(rr, 2); l += m; } while ((size_t) expr+mpz_sizeinbase(rr, 2) > (size_t)((int)-q)); TMP_FREE(marker); mpz_clear(tmp); return l; }
void mout (const MINT *x) { mp_ptr xp; mp_srcptr x_ptr; mp_size_t x_size; unsigned char *str; size_t str_size; int i; TMP_DECL (marker); x_size = x->_mp_size; if (x_size == 0) { fputc ('0', stdout); fputc ('\n', stdout); return; } if (x_size < 0) { fputc ('-', stdout); x_size = -x_size; } TMP_MARK (marker); x_ptr = x->_mp_d; MPN_SIZEINBASE (str_size, x_ptr, x_size, 10); str_size += 2; str = (unsigned char *) TMP_ALLOC (str_size); /* mpn_get_str clobbers its argument */ xp = TMP_ALLOC_LIMBS (x_size); MPN_COPY (xp, x_ptr, x_size); str_size = mpn_get_str (str, 10, xp, x_size); /* mpn_get_str might make a leading zero, skip it. */ str_size -= (*str == 0); str += (*str == 0); ASSERT (*str != 0); /* Translate to printable chars. */ for (i = 0; i < str_size; i++) str[i] = "0123456789"[str[i]]; str[str_size] = 0; str_size = strlen ((char *) str); if (str_size % 10 != 0) { fwrite (str, 1, str_size % 10, stdout); str += str_size % 10; str_size -= str_size % 10; if (str_size != 0) fputc (' ', stdout); } for (i = 0; i < str_size; i += 10) { fwrite (str, 1, 10, stdout); str += 10; if (i + 10 < str_size) fputc (' ', stdout); } fputc ('\n', stdout); TMP_FREE (marker); }
/* use Brent's formula exp(x) = (1+r+r^2/2!+r^3/3!+...)^(2^K)*2^n where x = n*log(2)+(2^K)*r together with Brent-Kung O(t^(1/2)) algorithm for the evaluation of power series. The resulting complexity is O(n^(1/3)*M(n)). */ int mpfr_exp_2 (mpfr_ptr y, mpfr_srcptr x, mp_rnd_t rnd_mode) { long n; unsigned long K, k, l, err; /* FIXME: Which type ? */ int error_r; mp_exp_t exps; mp_prec_t q, precy; int inexact; mpfr_t r, s, t; mpz_t ss; TMP_DECL(marker); precy = MPFR_PREC(y); MPFR_TRACE ( printf("Py=%d Px=%d", MPFR_PREC(y), MPFR_PREC(x)) ); MPFR_TRACE ( MPFR_DUMP (x) ); n = (long) (mpfr_get_d1 (x) / LOG2); /* error bounds the cancelled bits in x - n*log(2) */ if (MPFR_UNLIKELY(n == 0)) error_r = 0; else count_leading_zeros (error_r, (mp_limb_t) (n < 0) ? -n : n); error_r = BITS_PER_MP_LIMB - error_r + 2; /* for the O(n^(1/2)*M(n)) method, the Taylor series computation of n/K terms costs about n/(2K) multiplications when computed in fixed point */ K = (precy < SWITCH) ? __gmpfr_isqrt ((precy + 1) / 2) : __gmpfr_cuberoot (4*precy); l = (precy - 1) / K + 1; err = K + MPFR_INT_CEIL_LOG2 (2 * l + 18); /* add K extra bits, i.e. failure probability <= 1/2^K = O(1/precy) */ q = precy + err + K + 5; /*q = ( (q-1)/BITS_PER_MP_LIMB + 1) * BITS_PER_MP_LIMB; */ mpfr_init2 (r, q + error_r); mpfr_init2 (s, q + error_r); mpfr_init2 (t, q); /* the algorithm consists in computing an upper bound of exp(x) using a precision of q bits, and see if we can round to MPFR_PREC(y) taking into account the maximal error. Otherwise we increase q. */ for (;;) { MPFR_TRACE ( printf("n=%d K=%d l=%d q=%d\n",n,K,l,q) ); /* if n<0, we have to get an upper bound of log(2) in order to get an upper bound of r = x-n*log(2) */ mpfr_const_log2 (s, (n >= 0) ? GMP_RNDZ : GMP_RNDU); /* s is within 1 ulp of log(2) */ mpfr_mul_ui (r, s, (n < 0) ? -n : n, (n >= 0) ? GMP_RNDZ : GMP_RNDU); /* r is within 3 ulps of n*log(2) */ if (n < 0) mpfr_neg (r, r, GMP_RNDD); /* exact */ /* r = floor(n*log(2)), within 3 ulps */ MPFR_TRACE ( MPFR_DUMP (x) ); MPFR_TRACE ( MPFR_DUMP (r) ); mpfr_sub (r, x, r, GMP_RNDU); /* possible cancellation here: the error on r is at most 3*2^(EXP(old_r)-EXP(new_r)) */ while (MPFR_IS_NEG (r)) { /* initial approximation n was too large */ n--; mpfr_add (r, r, s, GMP_RNDU); } mpfr_prec_round (r, q, GMP_RNDU); MPFR_TRACE ( MPFR_DUMP (r) ); MPFR_ASSERTD (MPFR_IS_POS (r)); mpfr_div_2ui (r, r, K, GMP_RNDU); /* r = (x-n*log(2))/2^K, exact */ TMP_MARK(marker); MY_INIT_MPZ(ss, 3 + 2*((q-1)/BITS_PER_MP_LIMB)); exps = mpfr_get_z_exp (ss, s); /* s <- 1 + r/1! + r^2/2! + ... + r^l/l! */ l = (precy < SWITCH) ? mpfr_exp2_aux (ss, r, q, &exps) /* naive method */ : mpfr_exp2_aux2 (ss, r, q, &exps); /* Brent/Kung method */ MPFR_TRACE(printf("l=%d q=%d (K+l)*q^2=%1.3e\n", l, q, (K+l)*(double)q*q)); for (k = 0; k < K; k++) { mpz_mul (ss, ss, ss); exps <<= 1; exps += mpz_normalize (ss, ss, q); } mpfr_set_z (s, ss, GMP_RNDN); MPFR_SET_EXP(s, MPFR_GET_EXP (s) + exps); TMP_FREE(marker); /* don't need ss anymore */ if (n>0) mpfr_mul_2ui(s, s, n, GMP_RNDU); else mpfr_div_2ui(s, s, -n, GMP_RNDU); /* error is at most 2^K*(3l*(l+1)) ulp for mpfr_exp2_aux */ l = (precy < SWITCH) ? 3*l*(l+1) : l*(l+4) ; k = MPFR_INT_CEIL_LOG2 (l); /* k = 0; while (l) { k++; l >>= 1; } */ /* now k = ceil(log(error in ulps)/log(2)) */ K += k; MPFR_TRACE ( printf("after mult. by 2^n:\n") ); MPFR_TRACE ( MPFR_DUMP (s) ); MPFR_TRACE ( printf("err=%d bits\n", K) ); if (mpfr_can_round (s, q - K, GMP_RNDN, GMP_RNDZ, precy + (rnd_mode == GMP_RNDN)) ) break; MPFR_TRACE (printf("prec++, use %d\n", q+BITS_PER_MP_LIMB) ); MPFR_TRACE (printf("q=%d q-K=%d precy=%d\n",q,q-K,precy) ); q += BITS_PER_MP_LIMB; mpfr_set_prec (r, q); mpfr_set_prec (s, q); mpfr_set_prec (t, q); } inexact = mpfr_set (y, s, rnd_mode); mpfr_clear (r); mpfr_clear (s); mpfr_clear (t); return inexact; }
/* returns 0 if result exact, non-zero otherwise */ int mpfr_div_ui (mpfr_ptr y, mpfr_srcptr x, unsigned long int u, mp_rnd_t rnd_mode) { long int xn, yn, dif, sh, i; mp_limb_t *xp, *yp, *tmp, c, d; mp_exp_t exp; int inexact, middle = 1; TMP_DECL(marker); if (MPFR_UNLIKELY( MPFR_IS_SINGULAR(x) )) { if (MPFR_IS_NAN(x)) { MPFR_SET_NAN(y); MPFR_RET_NAN; } else if (MPFR_IS_INF(x)) { MPFR_SET_INF(y); MPFR_SET_SAME_SIGN(y, x); MPFR_RET(0); } else { MPFR_ASSERTD(MPFR_IS_ZERO(x)); if (u == 0)/* 0/0 is NaN */ { MPFR_SET_NAN(y); MPFR_RET_NAN; } else { MPFR_SET_ZERO(y); MPFR_RET(0); } } } if (MPFR_UNLIKELY(u == 0)) { /* x/0 is Inf */ MPFR_SET_INF(y); MPFR_SET_SAME_SIGN(y, x); MPFR_RET(0); } MPFR_CLEAR_FLAGS(y); MPFR_SET_SAME_SIGN(y, x); TMP_MARK(marker); xn = MPFR_LIMB_SIZE(x); yn = MPFR_LIMB_SIZE(y); xp = MPFR_MANT(x); yp = MPFR_MANT(y); exp = MPFR_GET_EXP (x); dif = yn + 1 - xn; /* we need to store yn+1 = xn + dif limbs of the quotient */ /* don't use tmp=yp since the mpn_lshift call below requires yp >= tmp+1 */ tmp = (mp_limb_t*) TMP_ALLOC((yn + 1) * BYTES_PER_MP_LIMB); c = (mp_limb_t) u; MPFR_ASSERTN(u == c); if (dif >= 0) c = mpn_divrem_1 (tmp, dif, xp, xn, c); /* used all the dividend */ else /* dif < 0 i.e. xn > yn, don't use the (-dif) low limbs from x */ c = mpn_divrem_1 (tmp, 0, xp - dif, yn + 1, c); inexact = (c != 0); /* First pass in estimating next bit of the quotient, in case of RNDN * * In case we just have the right number of bits (postpone this ?), * * we need to check whether the remainder is more or less than half * * the divisor. The test must be performed with a subtraction, so as * * to prevent carries. */ if (rnd_mode == GMP_RNDN) { if (c < (mp_limb_t) u - c) /* We have u > c */ middle = -1; else if (c > (mp_limb_t) u - c) middle = 1; else middle = 0; /* exactly in the middle */ } /* If we believe that we are right in the middle or exact, we should check that we did not neglect any word of x (division large / 1 -> small). */ for (i=0; ((inexact == 0) || (middle == 0)) && (i < -dif); i++) if (xp[i]) inexact = middle = 1; /* larger than middle */ /* If the high limb of the result is 0 (xp[xn-1] < u), remove it. Otherwise, compute the left shift to be performed to normalize. In the latter case, we discard some low bits computed. They contain information useful for the rounding, hence the updating of middle and inexact. */ if (tmp[yn] == 0) { MPN_COPY(yp, tmp, yn); exp -= BITS_PER_MP_LIMB; sh = 0; } else { count_leading_zeros (sh, tmp[yn]); /* shift left to normalize */ if (sh) { mp_limb_t w = tmp[0] << sh; mpn_lshift (yp, tmp + 1, yn, sh); yp[0] += tmp[0] >> (BITS_PER_MP_LIMB - sh); if (w > (MPFR_LIMB_ONE << (BITS_PER_MP_LIMB - 1))) { middle = 1; } else if (w < (MPFR_LIMB_ONE << (BITS_PER_MP_LIMB - 1))) { middle = -1; } else { middle = (c != 0); } inexact = inexact || (w != 0); exp -= sh; } else { /* this happens only if u == 1 and xp[xn-1] >= 1<<(BITS_PER_MP_LIMB-1). It might be better to handle the u == 1 case seperately ? */ MPN_COPY (yp, tmp + 1, yn); } }
void _gmp_rand (mp_ptr rp, gmp_randstate_t rstate, unsigned long int nbits) { mp_size_t rn; /* Size of R. */ rn = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS; switch (rstate->_mp_alg) { case GMP_RAND_ALG_LC: { unsigned long int rbitpos; int chunk_nbits; mp_ptr tp; mp_size_t tn; TMP_DECL (lcmark); TMP_MARK (lcmark); chunk_nbits = rstate->_mp_algdata._mp_lc->_mp_m2exp / 2; tn = (chunk_nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS; tp = (mp_ptr) TMP_ALLOC (tn * BYTES_PER_MP_LIMB); rbitpos = 0; while (rbitpos + chunk_nbits <= nbits) { mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS; if (rbitpos % GMP_NUMB_BITS != 0) { mp_limb_t savelimb, rcy; /* Target of of new chunk is not bit aligned. Use temp space and align things by shifting it up. */ lc (tp, rstate); savelimb = r2p[0]; rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS); r2p[0] |= savelimb; /* bogus */ if ((chunk_nbits % GMP_NUMB_BITS + rbitpos % GMP_NUMB_BITS) > GMP_NUMB_BITS) r2p[tn] = rcy; } else { /* Target of of new chunk is bit aligned. Let `lc' put bits directly into our target variable. */ lc (r2p, rstate); } rbitpos += chunk_nbits; } /* Handle last [0..chunk_nbits) bits. */ if (rbitpos != nbits) { mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS; int last_nbits = nbits - rbitpos; tn = (last_nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS; lc (tp, rstate); if (rbitpos % GMP_NUMB_BITS != 0) { mp_limb_t savelimb, rcy; /* Target of of new chunk is not bit aligned. Use temp space and align things by shifting it up. */ savelimb = r2p[0]; rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS); r2p[0] |= savelimb; if (rbitpos + tn * GMP_NUMB_BITS - rbitpos % GMP_NUMB_BITS < nbits) r2p[tn] = rcy; } else { MPN_COPY (r2p, tp, tn); } /* Mask off top bits if needed. */ if (nbits % GMP_NUMB_BITS != 0) rp[nbits / GMP_NUMB_BITS] &= ~ ((~(mp_limb_t) 0) << nbits % GMP_NUMB_BITS); } TMP_FREE (lcmark); break; } default: ASSERT (0); break; } }