/* Put in rp[n..2n-1] an approximation of the n high limbs of {np, n}^2. The error is less than n ulps of rp[n]. */ void mpfr_sqrhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mp_size_t n) { mp_size_t k; MPFR_STAT_STATIC_ASSERT (MPFR_SQRHIGH_TAB_SIZE > 2); /* ensures k < n */ k = MPFR_LIKELY (n < MPFR_SQRHIGH_TAB_SIZE) ? sqrhigh_ktab[n] : (n+4)/2; /* ensures that k >= (n+3)/2 */ MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n)); if (k < 0) /* we can't use mpn_sqr_basecase here, since it requires n <= SQR_KARATSUBA_THRESHOLD, where SQR_KARATSUBA_THRESHOLD is not exported by GMP */ mpn_sqr_n (rp, np, n); else if (k == 0) mpfr_mulhigh_n_basecase (rp, np, np, n); else { mp_size_t l = n - k; mp_limb_t cy; mpn_sqr_n (rp + 2 * l, np + l, k); /* fills rp[2l..2n-1] */ mpfr_mulhigh_n (rp, np, np + k, l); /* fills rp[l-1..2l-1] */ /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */ cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1); cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1); mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */ } }
int main (int argc, char **argv) { mp_ptr r1p, r2p, s1p, s2p; double t; mp_size_t n; n = strtol (argv[1], 0, 0); r1p = malloc (n * BYTES_PER_MP_LIMB); r2p = malloc (n * BYTES_PER_MP_LIMB); s1p = malloc (n * BYTES_PER_MP_LIMB); s2p = malloc (n * BYTES_PER_MP_LIMB); TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n))); printf (" separate add and sub: %.3f\n", t); TIME (t,mpn_addsub_n(r1p,r2p,s1p,s2p,n)); printf ("combined addsub separate variables: %.3f\n", t); TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n)); printf (" combined addsub r1 overlap: %.3f\n", t); TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n)); printf (" combined addsub r2 overlap: %.3f\n", t); TIME (t,mpn_addsub_n(r1p,r2p,r1p,r2p,n)); printf (" combined addsub in-place: %.3f\n", t); return 0; }
mp_limb_t div_preinv1(mp_limb_t d1, mp_limb_t d2) { mp_limb_t q, r[2], p[2], cy; if (d2 + 1 == 0 && d1 + 1 == 0) return 0; if (d1 + 1 == 0) q = ~d1, r[1] = ~d2; else udiv_qrnnd(q, r[1], ~d1, ~d2, d1 + 1); r[0] = 0; if (d2 + 1 == 0) add_ssaaaa(cy, r[1], 0, r[1], 0, q); else { umul_ppmm(p[1], p[0], q, ~d2 - 1); cy = mpn_add_n(r, r, p, 2); } p[0] = d2 + 1, p[1] = d1 + (d2 + 1 == 0); if (cy || mpn_cmp(r, p, 2) >= 0) q++; return q; }
void mpir_fft_trunc1_twiddle(mp_ptr * ii, mp_size_t is, mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2, mp_size_t ws, mp_size_t r, mp_size_t c, mp_size_t rs, mp_size_t trunc) { mp_size_t i; mp_size_t limbs = (w*n)/GMP_LIMB_BITS; if (trunc == 2*n) mpir_fft_radix2_twiddle(ii, is, n, w, t1, t2, ws, r, c, rs); else if (trunc <= n) { for (i = 0; i < n; i++) mpn_add_n(ii[i*is], ii[i*is], ii[(i+n)*is], limbs + 1); mpir_fft_trunc1_twiddle(ii, is, n/2, 2*w, t1, t2, ws, r, c, 2*rs, trunc); } else { for (i = 0; i < n; i++) { mpir_fft_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, limbs, w); MP_PTR_SWAP(ii[i*is], *t1); MP_PTR_SWAP(ii[(n+i)*is], *t2); } mpir_fft_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, ws, r, c, 2*rs); mpir_fft_trunc1_twiddle(ii + n*is, is, n/2, 2*w, t1, t2, ws, r + rs, c, 2*rs, trunc - n); } }
void mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip) { mp_limb_t q[2]; mp_size_t j; mp_limb_t upn; mp_limb_t cy; ASSERT_MPN (up, 2*n); if ((n & 1) != 0) { up[0] = mpn_addmul_1 (up, mp, n, (up[0] * mip[0]) & GMP_NUMB_MASK); up++; } for (j = n - 2; j >= 0; j -= 2) { umul2low (q[1], q[0], mip[1], mip[0], up[1], up[0]); upn = up[n]; /* mpn_addmul_2 overwrites this */ up[1] = mpn_addmul_2 (up, mp, n, q); up[0] = up[n]; up[n] = upn; up += 2; } cy = mpn_add_n (rp, up, up - n, n); if (cy != 0) mpn_sub_n (rp, rp, mp, n); }
/* Compute e^x. */ static void exp_mpn (mp1 ex, mp1 x) { unsigned int n; mp1 xp; mp2 tmp; mp_limb_t chk; mp1 tol; memset (xp, 0, sizeof (mp1)); memset (ex, 0, sizeof (mp1)); xp[FRAC / mpbpl] = (mp_limb_t)1 << FRAC % mpbpl; memset (tol, 0, sizeof (mp1)); tol[(FRAC - TOL) / mpbpl] = (mp_limb_t)1 << (FRAC - TOL) % mpbpl; n = 0; do { /* Calculate sum(x^n/n!) until the next term is sufficiently small. */ mpn_mul_n (tmp, xp, x, SZ); assert(tmp[SZ * 2 - 1] == 0); if (n > 0) mpn_divmod_1 (xp, tmp + FRAC / mpbpl, SZ, n); chk = mpn_add_n (ex, ex, xp, SZ); assert (chk == 0); ++n; assert (n < 80); /* Catch too-high TOL. */ } while (n < 10 || mpn_cmp (xp, tol, SZ) >= 0); }
void mpn_invert_trunc(mp_ptr x_new, mp_size_t m, mp_srcptr xp, mp_size_t n, mp_srcptr ap) { mp_ptr tp; mp_limb_t cy; TMP_DECL; TMP_MARK; tp = TMP_ALLOC_LIMBS (2 * m); MPN_COPY(x_new, xp + n - m, m); ap += (n - m); mpn_mul_n (tp, x_new, ap, m); mpn_add_n (tp + m, tp + m, ap, m); /* A * msb(X) */ /* now check B^(2n) - X*A <= A */ mpn_not (tp, 2 * m); mpn_add_1 (tp, tp, 2 * m, 1); /* B^(2m) - X*A */ while (tp[m] || mpn_cmp (tp, ap, m) > 0) { mpn_add_1(x_new, x_new, m, 1); tp[m] -= mpn_sub_n(tp, tp, ap, m); } TMP_FREE; }
int test_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n) { int res = 1; mp_size_t i; mp_ptr tp, up; mp_limb_t cy; TMP_DECL; TMP_MARK; tp = TMP_ALLOC_LIMBS (2 * n); up = TMP_ALLOC_LIMBS (2 * n); /* first check X*A < B^(2*n) */ mpn_mul_n (tp, xp, ap, n); cy = mpn_add_n (tp + n, tp + n, ap, n); /* A * msb(X) */ if (cy != 0) return 0; /* now check B^(2n) - X*A <= A */ mpn_com_n (tp, tp, 2 * n); mpn_add_1 (tp, tp, 2 * n, 1); /* B^(2n) - X*A */ MPN_ZERO (up, 2 * n); MPN_COPY (up, ap, n); res = mpn_cmp (tp, up, 2 * n) <= 0; TMP_FREE; return res; }
static mp_limb_t mpn_dc_div_2_by_1 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch) { mp_limb_t qhl, cc; mp_size_t n2 = n/2; if (n % 2 != 0) { mp_ptr qp1 = qp + 1; qhl = mpn_dc_div_3_by_2 (qp1 + n2, np + 2 + n2, dp + 1, n2, scratch); qhl += mpn_add_1 (qp1 + n2, qp1 + n2, n2, mpn_dc_div_3_by_2 (qp1, np + 2, dp + 1, n2, scratch)); cc = mpn_submul_1 (np + 1, qp1, n - 1, dp[0]); cc = mpn_sub_1 (np + n, np + n, 1, cc); if (qhl != 0) cc += mpn_sub_1 (np + n, np + n, 1, dp[0]); while (cc != 0) { qhl -= mpn_sub_1 (qp1, qp1, n - 1, (mp_limb_t) 1); cc -= mpn_add_n (np + 1, np + 1, dp, n); } qhl += mpn_add_1 (qp1, qp1, n - 1, mpn_sb_divrem_mn (qp, np, n + 1, dp, n)); } else { qhl = mpn_dc_div_3_by_2 (qp + n2, np + n2, dp, n2, scratch); qhl += mpn_add_1 (qp + n2, qp + n2, n2, mpn_dc_div_3_by_2 (qp, np, dp, n2, scratch)); } return qhl; }
static int abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) { int result; result = abs_sub_n (rm, rp, rs, n); ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n)); return result; }
static void fp_sub(element_ptr r, element_ptr a, element_ptr b) { fp_field_data_ptr p = r->field->data; size_t t = p->limbs; if (mpn_sub_n(r->data, a->data, b->data, t)) { mpn_add_n(r->data, r->data, p->primelimbs, t); } }
mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, gmp_pi1_t *dinv, mp_ptr tp) { mp_size_t lo, hi; mp_limb_t cy, qh, ql; lo = n >> 1; /* floor(n/2) */ hi = n - lo; /* ceil(n/2) */ if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD)) qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32); else qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp); mpn_mul (tp, qp + lo, hi, dp, lo); cy = mpn_sub_n (np + lo, np + lo, tp, n); if (qh != 0) cy += mpn_sub_n (np + n, np + n, dp, lo); while (cy != 0) { qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1); cy -= mpn_add_n (np + lo, np + lo, dp, n); } if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD)) ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32); else ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp); mpn_mul (tp, dp, hi, qp, lo); cy = mpn_sub_n (np, np, tp, n); if (ql != 0) cy += mpn_sub_n (np + lo, np + lo, dp, hi); while (cy != 0) { mpn_sub_1 (qp, qp, lo, 1); cy -= mpn_add_n (np, np, dp, n); } return qh; }
static void mpn_karasub(mp_ptr rp,mp_ptr tp,mp_size_t n) {mp_size_t n2,n3;mp_limb_t c1,c2,c3,top[2]; n2=n>>1;n3=n-n2; c1=mpn_sub_n(tp,rp+2*n2,tp,2*n2);//c1=mpn_sub_n(tp,rp+2*n2,tp,2*n3); c2=mpn_add_n(tp,tp,rp,2*n2); c3=mpn_add_n(rp+n2,rp+n2,tp,2*n2);//c3=mpn_add_n(rp+n2,rp+n2,tp,2*n3); top[1]=rp[2*n2+2*n3-1];top[0]=rp[2*n2+2*n3-2]; mpn_incr_u(rp+3*n2,c3);//mpn_incr_u(rp+n2+2*n3,c3); mpn_incr_u(rp+3*n2,c2); mpn_decr_u(rp+3*n2,c1);//mpn_decr_u(rp+n2+2*n3,c1); if(n2==n3)return; c1=mpn_sub_n(rp+3*n2,rp+3*n2,tp+2*n2,2); c2=mpn_add_n(rp+3*n2,rp+3*n2,top,2); if(c2==1 && c1==0)mpn_incr_u(rp+3*n2+2,1); if(c2==0 && c1==1)mpn_decr_u(rp+3*n2+2,1); return;}
void mpz_lucnum2_ui (mpz_ptr ln, mpz_ptr lnsub1, unsigned long n) { mp_ptr lp, l1p, f1p; mp_size_t size; mp_limb_t c; TMP_DECL; ASSERT (ln != lnsub1); /* handle small n quickly, and hide the special case for L[-1]=-1 */ if (n <= FIB_TABLE_LUCNUM_LIMIT) { mp_limb_t f = FIB_TABLE (n); mp_limb_t f1 = FIB_TABLE ((int) n - 1); /* L[n] = F[n] + 2F[n-1] */ PTR(ln)[0] = f + 2*f1; SIZ(ln) = 1; /* L[n-1] = 2F[n] - F[n-1], but allow for L[-1]=-1 */ PTR(lnsub1)[0] = (n == 0 ? 1 : 2*f - f1); SIZ(lnsub1) = (n == 0 ? -1 : 1); return; } TMP_MARK; size = MPN_FIB2_SIZE (n); f1p = TMP_ALLOC_LIMBS (size); MPZ_REALLOC (ln, size+1); MPZ_REALLOC (lnsub1, size+1); lp = PTR(ln); l1p = PTR(lnsub1); size = mpn_fib2_ui (l1p, f1p, n); /* L[n] = F[n] + 2F[n-1] */ #if HAVE_NATIVE_mpn_addlsh1_n c = mpn_addlsh1_n (lp, l1p, f1p, size); #else c = mpn_lshift1 (lp, f1p, size); c += mpn_add_n (lp, lp, l1p, size); #endif lp[size] = c; SIZ(ln) = size + (c != 0); /* L[n-1] = 2F[n] - F[n-1] */ c = mpn_double (l1p, size); c -= mpn_sub_n (l1p, l1p, f1p, size); ASSERT ((mp_limb_signed_t) c >= 0); l1p[size] = c; SIZ(lnsub1) = size + (c != 0); TMP_FREE; }
static int fp_sgn_even(element_ptr a) { fp_field_data_ptr p = a->field->data; if (fp_is0(a)) return 0; mp_limb_t *sum = _alloca(p->limbs * sizeof(mp_limb_t)); int carry = mpn_add_n(sum, a->data, a->data, p->limbs); if (carry) return 1; return mpn_cmp(sum, p->primelimbs, p->limbs); }
static void ref_matrix22_mul (struct matrix *R, const struct matrix *A, const struct matrix *B, mp_ptr tp) { mp_size_t an, bn, n; mp_ptr r00, r01, r10, r11, a00, a01, a10, a11, b00, b01, b10, b11; if (A->n >= B->n) { r00 = R->e00; a00 = A->e00; b00 = B->e00; r01 = R->e01; a01 = A->e01; b01 = B->e01; r10 = R->e10; a10 = A->e10; b10 = B->e10; r11 = R->e11; a11 = A->e11; b11 = B->e11; an = A->n, bn = B->n; } else { /* Transpose */ r00 = R->e00; a00 = B->e00; b00 = A->e00; r01 = R->e10; a01 = B->e10; b01 = A->e10; r10 = R->e01; a10 = B->e01; b10 = A->e01; r11 = R->e11; a11 = B->e11; b11 = A->e11; an = B->n, bn = A->n; } n = an + bn; R->n = n + 1; mpn_mul (r00, a00, an, b00, bn); mpn_mul (tp, a01, an, b10, bn); r00[n] = mpn_add_n (r00, r00, tp, n); mpn_mul (r01, a00, an, b01, bn); mpn_mul (tp, a01, an, b11, bn); r01[n] = mpn_add_n (r01, r01, tp, n); mpn_mul (r10, a10, an, b00, bn); mpn_mul (tp, a11, an, b10, bn); r10[n] = mpn_add_n (r10, r10, tp, n); mpn_mul (r11, a10, an, b01, bn); mpn_mul (tp, a11, an, b11, bn); r11[n] = mpn_add_n (r11, r11, tp, n); }
static void fp_add(element_ptr r, element_ptr a, element_ptr b) { fp_field_data_ptr p = r->field->data; const size_t t = p->limbs; mp_limb_t carry; carry = mpn_add_n(r->data, a->data, b->data, t); if (carry || mpn_cmp(r->data, p->primelimbs, t) >= 0) { mpn_sub_n(r->data, r->data, p->primelimbs, t); } }
void ecc_modp_add (const struct ecc_curve *ecc, mp_limb_t *rp, const mp_limb_t *ap, const mp_limb_t *bp) { mp_limb_t cy; cy = mpn_add_n (rp, ap, bp, ecc->size); cy = cnd_add_n (cy, rp, ecc->Bmodp, ecc->size); cy = cnd_add_n (cy, rp, ecc->Bmodp, ecc->size); assert (cy == 0); }
void mpn_mullow_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) { if (BELOW_THRESHOLD (n, MULLOW_BASECASE_THRESHOLD)) { /* Allocate workspace of fixed size on stack: fast! */ mp_limb_t ws[MUL_BASECASE_ALLOC]; mpn_mul_basecase (ws, xp, n, yp, n); MPN_COPY (rp, ws, n); } else if (BELOW_THRESHOLD (n, MULLOW_DC_THRESHOLD)) { mpn_mullow_basecase (rp, xp, yp, n); } else if (BELOW_THRESHOLD (n, MULLOW_MUL_N_THRESHOLD)) { /* Divide-and-conquer */ mp_size_t n2 = n >> 1; /* floor(n/2) */ mp_size_t n1 = n - n2; /* ceil(n/2) */ mp_ptr tp; TMP_SDECL; TMP_SMARK; tp = TMP_SALLOC_LIMBS (n1); /* Split as x = x1 2^(n1 GMP_NUMB_BITS) + x0, y = y1 2^(n2 GMP_NUMB_BITS) + y0 */ /* x0 * y0 */ mpn_mul_n (rp, xp, yp, n2); if (n1 != n2) rp[2 * n2] = mpn_addmul_1 (rp + n2, yp, n2, xp[n2]); /* x1 * y0 * 2^(n1 GMP_NUMB_BITS) */ mpn_mullow_n (tp, xp + n1, yp, n2); mpn_add_n (rp + n1, rp + n1, tp, n2); /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */ mpn_mullow_n (tp, yp + n2, xp, n1); mpn_add_n (rp + n2, rp + n2, tp, n1); TMP_SFREE; } else {
void mpn_matrix22_mul (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn, mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn, mp_ptr tp) { if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD) || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD)) { mp_ptr p0, p1; unsigned i; /* Temporary storage: 3 rn + 2 mn */ p0 = tp + rn; p1 = p0 + rn + mn; for (i = 0; i < 2; i++) { MPN_COPY (tp, r0, rn); if (rn >= mn) { mpn_mul (p0, r0, rn, m0, mn); mpn_mul (p1, r1, rn, m3, mn); mpn_mul (r0, r1, rn, m2, mn); mpn_mul (r1, tp, rn, m1, mn); } else { mpn_mul (p0, m0, mn, r0, rn); mpn_mul (p1, m3, mn, r1, rn); mpn_mul (r0, m2, mn, r1, rn); mpn_mul (r1, m1, mn, tp, rn); } r0[rn+mn] = mpn_add_n (r0, r0, p0, rn + mn); r1[rn+mn] = mpn_add_n (r1, r1, p1, rn + mn); r0 = r2; r1 = r3; } } else mpn_matrix22_mul_strassen (r0, r1, r2, r3, rn, m0, m1, m2, m3, mn, tp); }
static mp_limb_t DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) { #if USE_MUL_1 && 0 return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s)); #else mp_limb_t __cy; __cy = mpn_lshift(ws,src,n,s); return __cy + mpn_add_n(dst,dst,ws,n); #endif }
static void ref_mpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { mp_ptr tp; mp_size_t tn; mp_limb_t cy; if (vn < TOOM3_THRESHOLD) { /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own mul_basecase. */ if (vn != 0) mul_basecase (wp, up, un, vp, vn); else MPN_ZERO (wp, un); return; } if (vn < FFT_THRESHOLD) { /* In the mpn_toom3_mul_n and mpn_toom4_mul_n range, use mpn_kara_mul_n. */ tn = 2 * vn + MPN_KARA_MUL_N_TSIZE (vn); tp = __GMP_ALLOCATE_FUNC_LIMBS (tn); mpn_kara_mul_n (tp, up, vp, vn, tp + 2 * vn); } else { /* Finally, for the largest operands, use mpn_toom3_mul_n. */ /* The "- 63 + 255" tweaks the allocation to allow for huge operands. See the definition of this macro in gmp-impl.h to understand this. */ tn = 2 * vn + MPN_TOOM3_MUL_N_TSIZE (vn) - 63 + 255; tp = __GMP_ALLOCATE_FUNC_LIMBS (tn); mpn_toom3_mul_n (tp, up, vp, vn, tp + 2 * vn); } if (un != vn) { if (un - vn < vn) ref_mpn_mul (wp + vn, vp, vn, up + vn, un - vn); else ref_mpn_mul (wp + vn, up + vn, un - vn, vp, vn); MPN_COPY (wp, tp, vn); cy = mpn_add_n (wp + vn, wp + vn, tp + vn, vn); mpn_incr_u (wp + 2 * vn, cy); } else { MPN_COPY (wp, tp, 2 * vn); } __GMP_FREE_FUNC_LIMBS (tp, tn); }
static int add_signed_n (mp_ptr rp, mp_srcptr ap, int as, mp_srcptr bp, int bs, mp_size_t n) { if (as != bs) return as ^ abs_sub_n (rp, ap, bp, n); else { ASSERT_NOCARRY (mpn_add_n (rp, ap, bp, n)); return as; } }
static void fp_halve(element_ptr r, element_ptr a) { fp_field_data_ptr p = r->field->data; const size_t t = p->limbs; int carry = 0; mp_limb_t *alimb = a->data; mp_limb_t *rlimb = r->data; if (alimb[0] & 1) carry = mpn_add_n(rlimb, alimb, p->primelimbs, t); else fp_set(r, a); mpn_rshift(rlimb, rlimb, t, 1); if (carry) rlimb[t - 1] |= ((mp_limb_t) 1) << (sizeof(mp_limb_t) * 8 - 1); }
mp_limb_t mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb) { mp_ptr p0, p1, tp; mp_limb_t cy_limb; TMP_DECL (marker); TMP_MARK (marker); p1 = TMP_ALLOC (n * BYTES_PER_MP_LIMB); p0 = TMP_ALLOC (n * BYTES_PER_MP_LIMB); tp = TMP_ALLOC (n * BYTES_PER_MP_LIMB); GMPN_MULWW (p1, p0, up, &n, &limb); cy_limb = mpn_add_n (tp, rp, p0, n); rp[0] = tp[0]; cy_limb += mpn_add_n (rp + 1, tp + 1, p1, n - 1); cy_limb += p1[n - 1]; TMP_FREE (marker); return cy_limb; }
void check_one (mp_ptr qp, mp_srcptr rp, mp_limb_t rh, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, const char *fname) { mp_size_t qn; int cmp; mp_ptr tp; mp_limb_t cy = 4711; /* silence warnings */ TMP_DECL; qn = nn - dn; if (qn == 0) return; TMP_MARK; tp = TMP_ALLOC_LIMBS (nn + 1); if (dn >= qn) mpn_mul (tp, dp, dn, qp, qn); else mpn_mul (tp, qp, qn, dp, dn); if (rp != NULL) { cy = mpn_add_n (tp + qn, tp + qn, rp, dn); cmp = cy != rh || mpn_cmp (tp, np, nn) != 0; } else cmp = mpn_cmp (tp, np, nn - dn) != 0; if (cmp != 0) { printf ("\r*******************************************************************************\n"); printf ("%s inconsistent in test %lu\n", fname, test); printf ("N= "); dumpy (np, nn); printf ("D= "); dumpy (dp, dn); printf ("Q= "); dumpy (qp, qn); if (rp != NULL) { printf ("R= "); dumpy (rp, dn); printf ("Rb= %d, Cy=%d\n", (int) cy, (int) rh); } printf ("T= "); dumpy (tp, nn); printf ("nn = %ld, dn = %ld, qn = %ld", nn, dn, qn); printf ("\n*******************************************************************************\n"); abort (); } TMP_FREE; }
void tc4_copy (mp_ptr yp, mp_size_t * yn, mp_size_t offset, mp_srcptr xp, mp_size_t xn) { mp_size_t yu = ABS(*yn); mp_size_t xu = ABS(xn); mp_limb_t cy = 0; if (xn == 0) return; if (offset < yu) /* low part of x overlaps with y */ { if (offset + xu <= yu) /* x entirely inside y */ { cy = mpn_add_n (yp + offset, yp + offset, xp, xu); if (offset + xu < yu) cy = mpn_add_1 (yp + offset + xu, yp + offset + xu, yu - (offset + xu), cy); } else cy = mpn_add_n (yp + offset, yp + offset, xp, yu - offset); /* now cy is the carry at yp + yu */ if (xu + offset > yu) /* high part of x exceeds y */ { MPN_COPY (yp + yu, xp + yu - offset, xu + offset - yu); cy = mpn_add_1 (yp + yu, yp + yu, xu + offset - yu, cy); yu = xu + offset; } /* now cy is the carry at yp + yn */ if (cy) yp[yu++] = cy; MPN_NORMALIZE(yp, yu); *yn = yu; } else /* x does not overlap */ { if (offset > yu) MPN_ZERO (yp + yu, offset - yu); MPN_COPY (yp + offset, xp, xu); *yn = offset + xu; } }
/* Input is {ap,rn}; output is {rp,rn}, computation is mod B^rn - 1, and values are semi-normalised; zero is represented as either 0 or B^n - 1. Needs a scratch of 2rn limbs at tp. tp==rp is allowed. */ static void mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp) { mp_limb_t cy; ASSERT (0 < rn); mpn_sqr (tp, ap, rn); cy = mpn_add_n (rp, tp, tp + rn, rn); /* If cy == 1, then the value of rp is at most B^rn - 2, so there can * be no overflow when adding in the carry. */ MPN_INCR_U (rp, rn, cy); }
void mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch) { ASSERT (n > 0); ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT); ASSERT (! MPN_OVERLAP_P (ip, n, dp, n)); ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n))); ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n))); if (n == 1) invert_limb (*ip, *dp); else { TMP_DECL; TMP_MARK; if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD)) { /* Maximum scratch needed by this branch: 2*n */ mp_size_t i; mp_ptr xp; xp = scratch; /* 2 * n limbs */ for (i = n - 1; i >= 0; i--) xp[i] = GMP_NUMB_MAX; mpn_com (xp + n, dp, n); if (n == 2) { mpn_divrem_2 (ip, 0, xp, 4, dp); } else { gmp_pi1_t inv; invert_pi1 (inv, dp[n-1], dp[n-2]); /* FIXME: should we use dcpi1_div_q, for big sizes? */ mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32); } } else { /* Use approximated inverse; correct the result if needed. */ mp_limb_t e; /* The possible error in the approximate inverse */ ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) ); e = mpn_ni_invertappr (ip, dp, n, scratch); if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */ /* Code to detect and correct the "off by one" approximation. */ mpn_mul_n (scratch, ip, dp, n); ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n)); if (! mpn_add (scratch, scratch, 2*n, dp, n)) MPN_INCR_U (ip, n, 1); /* The value was wrong, correct it. */ } } TMP_FREE; } }
void mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch) { ASSERT (n > 0); ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT); ASSERT (! MPN_OVERLAP_P (ip, n, dp, n)); ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n))); ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n))); if (n == 1) invert_limb (*ip, *dp); else if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD)) { /* Maximum scratch needed by this branch: 2*n */ mp_size_t i; mp_ptr xp; xp = scratch; /* 2 * n limbs */ /* n > 1 here */ i = n; do xp[--i] = GMP_NUMB_MAX; while (i); mpn_com (xp + n, dp, n); if (n == 2) { mpn_divrem_2 (ip, 0, xp, 4, dp); } else { gmp_pi1_t inv; invert_pi1 (inv, dp[n-1], dp[n-2]); /* FIXME: should we use dcpi1_div_q, for big sizes? */ mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32); } } else { /* Use approximated inverse; correct the result if needed. */ mp_limb_t e; /* The possible error in the approximate inverse */ ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) ); e = mpn_ni_invertappr (ip, dp, n, scratch); if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */ /* Code to detect and correct the "off by one" approximation. */ mpn_mul_n (scratch, ip, dp, n); e = mpn_add_n (scratch, scratch, dp, n); /* FIXME: we only need e.*/ if (LIKELY(e)) /* The high part can not give a carry by itself. */ e = mpn_add_nc (scratch + n, scratch + n, dp, n, e); /* FIXME:e */ /* If the value was wrong (no carry), correct it (increment). */ e ^= CNST_LIMB (1); MPN_INCR_U (ip, n, e); } } }