void mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch) { ASSERT (n > 0); ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT); ASSERT (! MPN_OVERLAP_P (ip, n, dp, n)); ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n))); ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n))); if (n == 1) invert_limb (*ip, *dp); else if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD)) { /* Maximum scratch needed by this branch: 2*n */ mp_size_t i; mp_ptr xp; xp = scratch; /* 2 * n limbs */ /* n > 1 here */ i = n; do xp[--i] = GMP_NUMB_MAX; while (i); mpn_com (xp + n, dp, n); if (n == 2) { mpn_divrem_2 (ip, 0, xp, 4, dp); } else { gmp_pi1_t inv; invert_pi1 (inv, dp[n-1], dp[n-2]); /* FIXME: should we use dcpi1_div_q, for big sizes? */ mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32); } } else { /* Use approximated inverse; correct the result if needed. */ mp_limb_t e; /* The possible error in the approximate inverse */ ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) ); e = mpn_ni_invertappr (ip, dp, n, scratch); if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */ /* Code to detect and correct the "off by one" approximation. */ mpn_mul_n (scratch, ip, dp, n); e = mpn_add_n (scratch, scratch, dp, n); /* FIXME: we only need e.*/ if (LIKELY(e)) /* The high part can not give a carry by itself. */ e = mpn_add_nc (scratch + n, scratch + n, dp, n, e); /* FIXME:e */ /* If the value was wrong (no carry), correct it (increment). */ e ^= CNST_LIMB (1); MPN_INCR_U (ip, n, e); } } }
void mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch) { ASSERT (n > 0); ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT); ASSERT (! MPN_OVERLAP_P (ip, n, dp, n)); ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n))); ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n))); if (n == 1) invert_limb (*ip, *dp); else { TMP_DECL; TMP_MARK; if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD)) { /* Maximum scratch needed by this branch: 2*n */ mp_size_t i; mp_ptr xp; xp = scratch; /* 2 * n limbs */ for (i = n - 1; i >= 0; i--) xp[i] = GMP_NUMB_MAX; mpn_com (xp + n, dp, n); if (n == 2) { mpn_divrem_2 (ip, 0, xp, 4, dp); } else { gmp_pi1_t inv; invert_pi1 (inv, dp[n-1], dp[n-2]); /* FIXME: should we use dcpi1_div_q, for big sizes? */ mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32); } } else { /* Use approximated inverse; correct the result if needed. */ mp_limb_t e; /* The possible error in the approximate inverse */ ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) ); e = mpn_ni_invertappr (ip, dp, n, scratch); if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */ /* Code to detect and correct the "off by one" approximation. */ mpn_mul_n (scratch, ip, dp, n); ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n)); if (! mpn_add (scratch, scratch, 2*n, dp, n)) MPN_INCR_U (ip, n, 1); /* The value was wrong, correct it. */ } } TMP_FREE; } }
int main (int argc, char **argv) { mp_ptr ap, bp, rp, refp; mp_size_t max_n, n; gmp_randstate_ptr rands; long test, reps = 1000; TMP_SDECL; TMP_SMARK; tests_start (); TESTS_REPS (reps, argv, argc); rands = RANDS; max_n = 32; ap = TMP_SALLOC_LIMBS (max_n); bp = TMP_SALLOC_LIMBS (max_n); rp = TMP_SALLOC_LIMBS (max_n); refp = TMP_SALLOC_LIMBS (max_n); for (test = 0; test < reps; test++) { for (n = 1; n <= max_n; n++) { mpn_random2 (ap, n); mpn_random2 (bp, n); refmpn_and_n (refp, ap, bp, n); mpn_and_n (rp, ap, bp, n); check_one (refp, rp, ap, bp, n, "and_n"); refmpn_ior_n (refp, ap, bp, n); mpn_ior_n (rp, ap, bp, n); check_one (refp, rp, ap, bp, n, "ior_n"); refmpn_xor_n (refp, ap, bp, n); mpn_xor_n (rp, ap, bp, n); check_one (refp, rp, ap, bp, n, "xor_n"); refmpn_andn_n (refp, ap, bp, n); mpn_andn_n (rp, ap, bp, n); check_one (refp, rp, ap, bp, n, "andn_n"); refmpn_iorn_n (refp, ap, bp, n); mpn_iorn_n (rp, ap, bp, n); check_one (refp, rp, ap, bp, n, "iorn_n"); refmpn_nand_n (refp, ap, bp, n); mpn_nand_n (rp, ap, bp, n); check_one (refp, rp, ap, bp, n, "nand_n"); refmpn_nior_n (refp, ap, bp, n); mpn_nior_n (rp, ap, bp, n); check_one (refp, rp, ap, bp, n, "nior_n"); refmpn_xnor_n (refp, ap, bp, n); mpn_xnor_n (rp, ap, bp, n); check_one (refp, rp, ap, bp, n, "xnor_n"); refmpn_com (refp, ap, n); mpn_com (rp, ap, n); check_one (refp, rp, ap, bp, n, "com"); } } TMP_SFREE; tests_end (); return 0; }
REGPARM_ATTR (1) static void cfdiv_r_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt, int dir) { mp_size_t usize, abs_usize, limb_cnt, i; mp_srcptr up; mp_ptr wp; mp_limb_t high; usize = SIZ(u); if (usize == 0) { SIZ(w) = 0; return; } limb_cnt = cnt / GMP_NUMB_BITS; cnt %= GMP_NUMB_BITS; abs_usize = ABS (usize); /* MPZ_REALLOC(w) below is only when w!=u, so we can fetch PTR(u) here nice and early */ up = PTR(u); if ((usize ^ dir) < 0) { /* Round towards zero, means just truncate */ if (w == u) { /* if already smaller than limb_cnt then do nothing */ if (abs_usize <= limb_cnt) return; wp = PTR(w); } else { i = MIN (abs_usize, limb_cnt+1); wp = MPZ_REALLOC (w, i); MPN_COPY (wp, up, i); /* if smaller than limb_cnt then only the copy is needed */ if (abs_usize <= limb_cnt) { SIZ(w) = usize; return; } } } else { /* Round away from zero, means twos complement if non-zero */ /* if u!=0 and smaller than divisor, then must negate */ if (abs_usize <= limb_cnt) goto negate; /* if non-zero low limb, then must negate */ for (i = 0; i < limb_cnt; i++) if (up[i] != 0) goto negate; /* if non-zero partial limb, then must negate */ if ((up[limb_cnt] & LOW_MASK (cnt)) != 0) goto negate; /* otherwise low bits of u are zero, so that's the result */ SIZ(w) = 0; return; negate: /* twos complement negation to get 2**cnt-u */ wp = MPZ_REALLOC (w, limb_cnt+1); up = PTR(u); /* Ones complement */ i = MIN (abs_usize, limb_cnt+1); mpn_com (wp, up, i); for ( ; i <= limb_cnt; i++) wp[i] = GMP_NUMB_MAX; /* Twos complement. Since u!=0 in the relevant part, the twos complement never gives 0 and a carry, so can use MPN_INCR_U. */ MPN_INCR_U (wp, limb_cnt+1, CNST_LIMB(1)); usize = -usize; } /* Mask the high limb */ high = wp[limb_cnt]; high &= LOW_MASK (cnt); wp[limb_cnt] = high; /* Strip any consequent high zeros */ while (high == 0) { limb_cnt--; if (limb_cnt < 0) { SIZ(w) = 0; return; } high = wp[limb_cnt]; } limb_cnt++; SIZ(w) = (usize >= 0 ? limb_cnt : -limb_cnt); }