mp_size_t mpn_hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s, struct hgcd_matrix *M, mp_ptr tp) { struct hgcd_matrix1 M1; mp_limb_t mask; mp_limb_t ah, al, bh, bl; ASSERT (n > s); mask = ap[n-1] | bp[n-1]; ASSERT (mask > 0); if (n == s + 1) { if (mask < 4) goto subtract; ah = ap[n-1]; al = ap[n-2]; bh = bp[n-1]; bl = bp[n-2]; } else if (mask & GMP_NUMB_HIGHBIT) { ah = ap[n-1]; al = ap[n-2]; bh = bp[n-1]; bl = bp[n-2]; } else { int shift; count_leading_zeros (shift, mask); ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); } /* Try an mpn_hgcd2 step */ if (mpn_hgcd2 (ah, al, bh, bl, &M1)) { /* Multiply M <- M * M1 */ mpn_hgcd_matrix_mul_1 (M, &M1, tp); /* Can't swap inputs, so we need to copy. */ MPN_COPY (tp, ap, n); /* Multiply M1^{-1} (a;b) */ return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n); } subtract: return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_hook, M, tp); }
static mp_size_t hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s, struct hgcd_matrix *M, mp_ptr tp) { struct hgcd_matrix1 M1; mp_limb_t mask; mp_limb_t ah, al, bh, bl; mp_size_t an, bn, qn; int col; ASSERT (n > s); mask = ap[n-1] | bp[n-1]; ASSERT (mask > 0); if (n == s + 1) { if (mask < 4) goto subtract; ah = ap[n-1]; al = ap[n-2]; bh = bp[n-1]; bl = bp[n-2]; } else if (mask & GMP_NUMB_HIGHBIT) { ah = ap[n-1]; al = ap[n-2]; bh = bp[n-1]; bl = bp[n-2]; } else { int shift; count_leading_zeros (shift, mask); ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); } /* Try an mpn_hgcd2 step */ if (mpn_hgcd2 (ah, al, bh, bl, &M1)) { /* Multiply M <- M * M1 */ hgcd_matrix_mul_1 (M, &M1, tp); /* Can't swap inputs, so we need to copy. */ MPN_COPY (tp, ap, n); /* Multiply M1^{-1} (a;b) */ return mpn_hgcd_mul_matrix1_inverse_vector (&M1, ap, tp, bp, n); } subtract: /* There are two ways in which mpn_hgcd2 can fail. Either one of ah and bh was too small, or ah, bh were (almost) equal. Perform one subtraction step (for possible cancellation of high limbs), followed by one division. */ /* Since we must ensure that #(a-b) > s, we handle cancellation of high limbs explicitly up front. (FIXME: Or is it better to just subtract, normalize, and use an addition to undo if it turns out the the difference is too small?) */ for (an = n; an > s; an--) if (ap[an-1] != bp[an-1]) break; if (an == s) return 0; /* Maintain a > b. When needed, swap a and b, and let col keep track of how to update M. */ if (ap[an-1] > bp[an-1]) { /* a is largest. In the subtraction step, we need to update column 1 of M */ col = 1; } else { MP_PTR_SWAP (ap, bp); col = 0; } bn = n; MPN_NORMALIZE (bp, bn); if (bn <= s) return 0; /* We have #a, #b > s. When is it possible that #(a-b) < s? For cancellation to happen, the numbers must be of the form a = x + 1, 0, ..., 0, al b = x , GMP_NUMB_MAX, ..., GMP_NUMB_MAX, bl where al, bl denotes the least significant k limbs. If al < bl, then #(a-b) < k, and if also high(al) != 0, high(bl) != GMP_NUMB_MAX, then #(a-b) = k. If al >= bl, then #(a-b) = k + 1. */ if (ap[an-1] == bp[an-1] + 1) { mp_size_t k; int c; for (k = an-1; k > s; k--) if (ap[k-1] != 0 || bp[k-1] != GMP_NUMB_MAX) break; MPN_CMP (c, ap, bp, k); if (c < 0) { mp_limb_t cy; /* The limbs from k and up are cancelled. */ if (k == s) return 0; cy = mpn_sub_n (ap, ap, bp, k); ASSERT (cy == 1); an = k; } else { ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, k)); ap[k] = 1; an = k + 1; } } else ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an)); ASSERT (an > s); ASSERT (ap[an-1] > 0); ASSERT (bn > s); ASSERT (bp[bn-1] > 0); hgcd_matrix_update_1 (M, col); if (an < bn) { MPN_PTR_SWAP (ap, an, bp, bn); col ^= 1; } else if (an == bn) { int c; MPN_CMP (c, ap, bp, an); if (c < 0) { MP_PTR_SWAP (ap, bp); col ^= 1; } } /* Divide a / b. */ qn = an + 1 - bn; /* FIXME: We could use an approximate division, that may return a too small quotient, and only guarantee that the size of r is almost the size of b. FIXME: Let ap and remainder overlap. */ mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn); qn -= (tp[qn -1] == 0); /* Normalize remainder */ an = bn; for ( ; an > s; an--) if (ap[an-1] > 0) break; if (an <= s) { /* Quotient is too large */ mp_limb_t cy; cy = mpn_add (ap, bp, bn, ap, an); if (cy > 0) { ASSERT (bn < n); ap[bn] = cy; bp[bn] = 0; bn++; } MPN_DECR_U (tp, qn, 1); qn -= (tp[qn-1] == 0); } if (qn > 0) hgcd_matrix_update_q (M, tp, qn, col, tp + qn); return bn; }
/* Destroys inputs. */ int mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n, struct hgcd_matrix *M, mp_ptr tp) { mp_size_t s; int success = 0; ASSERT (n > 0); ASSERT ((ap[n-1] | bp[n-1]) != 0); if (n <= 2) /* Implies s = n. A fairly uninteresting case but exercised by the random inputs of the testsuite. */ return 0; ASSERT ((n+1)/2 - 1 < M->alloc); /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time we discard some of the least significant limbs, we must keep one additional bit to account for the truncation error. We maintain the GMP_NUMB_BITS * s - extra_bits as the current target size. */ s = n/2 + 1; if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD)) { unsigned extra_bits = 0; while (n > 2) { mp_size_t nn; ASSERT (n > s); ASSERT (n <= 2*s); nn = mpn_hgcd_step (n, ap, bp, s, M, tp); if (!nn) break; n = nn; success = 1; /* We can truncate and discard the lower p bits whenever nbits <= 2*sbits - p. To account for the truncation error, we must adjust sbits <-- sbits + 1 - p, rather than just sbits <-- sbits - p. This adjustment makes the produced matrix sligthly smaller than it could be. */ if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s) { mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS; if (extra_bits == 0) { /* We cross a limb boundary and bump s. We can't do that if the result is that it makes makes min(U, V) smaller than 2^{GMP_NUMB_BITS} s. */ if (s + 1 == n || mpn_zero_p (ap + s + 1, n - s - 1) || mpn_zero_p (bp + s + 1, n - s - 1)) continue; extra_bits = GMP_NUMB_BITS - 1; s++; } else { extra_bits--; } /* Drop the p least significant limbs */ ap += p; bp += p; n -= p; s -= p; } } ASSERT (s > 0); if (extra_bits > 0) { /* We can get here only of we have dropped at least one of the least significant bits, so we can decrement ap and bp. We can then shift left extra bits using mpn_shiftr. */ /* NOTE: In the unlikely case that n is large, it would be preferable to do an initial subdiv step to reduce the size before shifting, but that would mean daplicating mpn_gcd_subdiv_step with a bit count rather than a limb count. */ ap--; bp--; ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits); bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits); n += (ap[n] | bp[n]) > 0; ASSERT (success); while (n > 2) { mp_size_t nn; ASSERT (n > s); ASSERT (n <= 2*s); nn = mpn_hgcd_step (n, ap, bp, s, M, tp); if (!nn) return 1; n = nn; } } if (n == 2) { struct hgcd_matrix1 M1; ASSERT (s == 1); if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1)) { /* Multiply M <- M * M1 */ mpn_hgcd_matrix_mul_1 (M, &M1, tp); success = 1; } } return success; } else { mp_size_t n2 = (3*n)/4 + 1; mp_size_t p = n/2; mp_size_t nn; nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp); if (nn) { n = nn; /* FIXME: Discard some of the low limbs immediately? */ success = 1; } while (n > n2) { mp_size_t nn; /* Needs n + 1 storage */ nn = mpn_hgcd_step (n, ap, bp, s, M, tp); if (!nn) return success; n = nn; success = 1; } if (n > s + 2) { struct hgcd_matrix M1; mp_size_t scratch; p = 2*s - n + 1; scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p); mpn_hgcd_matrix_init(&M1, n - p, tp); if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch)) { /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */ ASSERT (M->n + 2 >= M1.n); /* Furthermore, assume M ends with a quotient (1, q; 0, 1), then either q or q + 1 is a correct quotient, and M1 will start with either (1, 0; 1, 1) or (2, 1; 1, 1). This rules out the case that the size of M * M1 is much smaller than the expected M->n + M1->n. */ ASSERT (M->n + M1.n < M->alloc); /* We need a bound for of M->n + M1.n. Let n be the original input size. Then ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2 and it follows that M.n + M1.n <= ceil(n/2) + 1 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the amount of needed scratch space. */ mpn_hgcd_matrix_mul (M, &M1, tp + scratch); return 1; } } for(;;) { mp_size_t nn; ASSERT (n > s); ASSERT (n <= 2*s); nn = mpn_hgcd_step (n, ap, bp, s, M, tp); if (!nn) return success; n = nn; success = 1; } } }