void mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch) { ASSERT (n > 0); ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT); ASSERT (! MPN_OVERLAP_P (ip, n, dp, n)); ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n))); ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n))); if (n == 1) invert_limb (*ip, *dp); else if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD)) { /* Maximum scratch needed by this branch: 2*n */ mp_size_t i; mp_ptr xp; xp = scratch; /* 2 * n limbs */ /* n > 1 here */ i = n; do xp[--i] = GMP_NUMB_MAX; while (i); mpn_com (xp + n, dp, n); if (n == 2) { mpn_divrem_2 (ip, 0, xp, 4, dp); } else { gmp_pi1_t inv; invert_pi1 (inv, dp[n-1], dp[n-2]); /* FIXME: should we use dcpi1_div_q, for big sizes? */ mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32); } } else { /* Use approximated inverse; correct the result if needed. */ mp_limb_t e; /* The possible error in the approximate inverse */ ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) ); e = mpn_ni_invertappr (ip, dp, n, scratch); if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */ /* Code to detect and correct the "off by one" approximation. */ mpn_mul_n (scratch, ip, dp, n); e = mpn_add_n (scratch, scratch, dp, n); /* FIXME: we only need e.*/ if (LIKELY(e)) /* The high part can not give a carry by itself. */ e = mpn_add_nc (scratch + n, scratch + n, dp, n, e); /* FIXME:e */ /* If the value was wrong (no carry), correct it (increment). */ e ^= CNST_LIMB (1); MPN_INCR_U (ip, n, e); } } }
void mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch) { ASSERT (n > 0); ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT); ASSERT (! MPN_OVERLAP_P (ip, n, dp, n)); ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n))); ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n))); if (n == 1) invert_limb (*ip, *dp); else { TMP_DECL; TMP_MARK; if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD)) { /* Maximum scratch needed by this branch: 2*n */ mp_size_t i; mp_ptr xp; xp = scratch; /* 2 * n limbs */ for (i = n - 1; i >= 0; i--) xp[i] = GMP_NUMB_MAX; mpn_com (xp + n, dp, n); if (n == 2) { mpn_divrem_2 (ip, 0, xp, 4, dp); } else { gmp_pi1_t inv; invert_pi1 (inv, dp[n-1], dp[n-2]); /* FIXME: should we use dcpi1_div_q, for big sizes? */ mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32); } } else { /* Use approximated inverse; correct the result if needed. */ mp_limb_t e; /* The possible error in the approximate inverse */ ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) ); e = mpn_ni_invertappr (ip, dp, n, scratch); if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */ /* Code to detect and correct the "off by one" approximation. */ mpn_mul_n (scratch, ip, dp, n); ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n)); if (! mpn_add (scratch, scratch, 2*n, dp, n)) MPN_INCR_U (ip, n, 1); /* The value was wrong, correct it. */ } } TMP_FREE; } }
void mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch) { mp_ptr np, rp; mp_size_t i; TMP_DECL; TMP_MARK; if (scratch == NULL) { scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (n)); } np = scratch; /* 2 * n limbs */ rp = scratch + 2 * n; /* n + 2 limbs */ for (i = n - 1; i >= 0; i--) np[i] = ~CNST_LIMB(0); mpn_com_n (np + n, dp, n); mpn_tdiv_qr (rp, ip, 0L, np, 2 * n, dp, n); MPN_COPY (ip, rp, n); TMP_FREE; }
int main (int argc, char **argv) { mp_ptr ip, dp, scratch; int count = COUNT; int test; gmp_randstate_ptr rands; TMP_DECL; TMP_MARK; if (argc > 1) { char *end; count = strtol (argv[1], &end, 0); if (*end || count <= 0) { fprintf (stderr, "Invalid test count: %s.\n", argv[1]); return 1; } } tests_start (); rands = RANDS; dp = TMP_ALLOC_LIMBS (MAX_N); ip = 1+TMP_ALLOC_LIMBS (MAX_N + 2); scratch = 1+TMP_ALLOC_LIMBS (mpn_invert_itch (MAX_N) + 2); for (test = 0; test < count; test++) { unsigned size_min; unsigned size_range; mp_size_t n; mp_size_t itch; mp_limb_t i_before, i_after, s_before, s_after; for (size_min = 1; (1L << size_min) < MIN_N; size_min++) ; /* We generate an in the MIN_N <= n <= (1 << size_range). */ size_range = size_min + gmp_urandomm_ui (rands, SIZE_LOG + 1 - size_min); n = MIN_N + gmp_urandomm_ui (rands, (1L << size_range) + 1 - MIN_N); mpn_random2 (dp, n); mpn_random2 (ip-1, n + 2); i_before = ip[-1]; i_after = ip[n]; itch = mpn_invert_itch (n); ASSERT_ALWAYS (itch <= mpn_invert_itch (MAX_N)); mpn_random2 (scratch-1, itch+2); s_before = scratch[-1]; s_after = scratch[itch]; dp[n-1] |= GMP_NUMB_HIGHBIT; mpn_invert (ip, dp, n, scratch); if (ip[-1] != i_before || ip[n] != i_after || scratch[-1] != s_before || scratch[itch] != s_after || ! invert_valid(ip, dp, n)) { printf ("ERROR in test %d, n = %d\n", test, (int) n); if (ip[-1] != i_before) { printf ("before ip:"); mpn_dump (ip -1, 1); printf ("keep: "); mpn_dump (&i_before, 1); } if (ip[n] != i_after) { printf ("after ip:"); mpn_dump (ip + n, 1); printf ("keep: "); mpn_dump (&i_after, 1); } if (scratch[-1] != s_before) { printf ("before scratch:"); mpn_dump (scratch-1, 1); printf ("keep: "); mpn_dump (&s_before, 1); } if (scratch[itch] != s_after) { printf ("after scratch:"); mpn_dump (scratch + itch, 1); printf ("keep: "); mpn_dump (&s_after, 1); } mpn_dump (dp, n); mpn_dump (ip, n); abort(); } } TMP_FREE; tests_end (); return 0; }