mp_size_t mpn_hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s, struct hgcd_matrix *M, mp_ptr tp) { struct hgcd_matrix1 M1; mp_limb_t mask; mp_limb_t ah, al, bh, bl; ASSERT (n > s); mask = ap[n-1] | bp[n-1]; ASSERT (mask > 0); if (n == s + 1) { if (mask < 4) goto subtract; ah = ap[n-1]; al = ap[n-2]; bh = bp[n-1]; bl = bp[n-2]; } else if (mask & GMP_NUMB_HIGHBIT) { ah = ap[n-1]; al = ap[n-2]; bh = bp[n-1]; bl = bp[n-2]; } else { int shift; count_leading_zeros (shift, mask); ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); } /* Try an mpn_hgcd2 step */ if (mpn_hgcd2 (ah, al, bh, bl, &M1)) { /* Multiply M <- M * M1 */ mpn_hgcd_matrix_mul_1 (M, &M1, tp); /* Can't swap inputs, so we need to copy. */ MPN_COPY (tp, ap, n); /* Multiply M1^{-1} (a;b) */ return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n); } subtract: return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_hook, M, tp); }
static mp_size_t hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s, struct hgcd_matrix *M, mp_ptr tp) { struct hgcd_matrix1 M1; mp_limb_t mask; mp_limb_t ah, al, bh, bl; mp_size_t an, bn, qn; int col; ASSERT (n > s); mask = ap[n-1] | bp[n-1]; ASSERT (mask > 0); if (n == s + 1) { if (mask < 4) goto subtract; ah = ap[n-1]; al = ap[n-2]; bh = bp[n-1]; bl = bp[n-2]; } else if (mask & GMP_NUMB_HIGHBIT) { ah = ap[n-1]; al = ap[n-2]; bh = bp[n-1]; bl = bp[n-2]; } else { int shift; count_leading_zeros (shift, mask); ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); } /* Try an mpn_hgcd2 step */ if (mpn_hgcd2 (ah, al, bh, bl, &M1)) { /* Multiply M <- M * M1 */ hgcd_matrix_mul_1 (M, &M1, tp); /* Can't swap inputs, so we need to copy. */ MPN_COPY (tp, ap, n); /* Multiply M1^{-1} (a;b) */ return mpn_hgcd_mul_matrix1_inverse_vector (&M1, ap, tp, bp, n); } subtract: /* There are two ways in which mpn_hgcd2 can fail. Either one of ah and bh was too small, or ah, bh were (almost) equal. Perform one subtraction step (for possible cancellation of high limbs), followed by one division. */ /* Since we must ensure that #(a-b) > s, we handle cancellation of high limbs explicitly up front. (FIXME: Or is it better to just subtract, normalize, and use an addition to undo if it turns out the the difference is too small?) */ for (an = n; an > s; an--) if (ap[an-1] != bp[an-1]) break; if (an == s) return 0; /* Maintain a > b. When needed, swap a and b, and let col keep track of how to update M. */ if (ap[an-1] > bp[an-1]) { /* a is largest. In the subtraction step, we need to update column 1 of M */ col = 1; } else { MP_PTR_SWAP (ap, bp); col = 0; } bn = n; MPN_NORMALIZE (bp, bn); if (bn <= s) return 0; /* We have #a, #b > s. When is it possible that #(a-b) < s? For cancellation to happen, the numbers must be of the form a = x + 1, 0, ..., 0, al b = x , GMP_NUMB_MAX, ..., GMP_NUMB_MAX, bl where al, bl denotes the least significant k limbs. If al < bl, then #(a-b) < k, and if also high(al) != 0, high(bl) != GMP_NUMB_MAX, then #(a-b) = k. If al >= bl, then #(a-b) = k + 1. */ if (ap[an-1] == bp[an-1] + 1) { mp_size_t k; int c; for (k = an-1; k > s; k--) if (ap[k-1] != 0 || bp[k-1] != GMP_NUMB_MAX) break; MPN_CMP (c, ap, bp, k); if (c < 0) { mp_limb_t cy; /* The limbs from k and up are cancelled. */ if (k == s) return 0; cy = mpn_sub_n (ap, ap, bp, k); ASSERT (cy == 1); an = k; } else { ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, k)); ap[k] = 1; an = k + 1; } } else ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an)); ASSERT (an > s); ASSERT (ap[an-1] > 0); ASSERT (bn > s); ASSERT (bp[bn-1] > 0); hgcd_matrix_update_1 (M, col); if (an < bn) { MPN_PTR_SWAP (ap, an, bp, bn); col ^= 1; } else if (an == bn) { int c; MPN_CMP (c, ap, bp, an); if (c < 0) { MP_PTR_SWAP (ap, bp); col ^= 1; } } /* Divide a / b. */ qn = an + 1 - bn; /* FIXME: We could use an approximate division, that may return a too small quotient, and only guarantee that the size of r is almost the size of b. FIXME: Let ap and remainder overlap. */ mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn); qn -= (tp[qn -1] == 0); /* Normalize remainder */ an = bn; for ( ; an > s; an--) if (ap[an-1] > 0) break; if (an <= s) { /* Quotient is too large */ mp_limb_t cy; cy = mpn_add (ap, bp, bn, ap, an); if (cy > 0) { ASSERT (bn < n); ap[bn] = cy; bp[bn] = 0; bn++; } MPN_DECR_U (tp, qn, 1); qn -= (tp[qn-1] == 0); } if (qn > 0) hgcd_matrix_update_q (M, tp, qn, col, tp + qn); return bn; }
int mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits) { mp_size_t scratch; mp_size_t matrix_scratch; mp_ptr tp; TMP_DECL; ASSERT (n > 0); ASSERT ( (ap[n-1] | bp[n-1]) > 0); ASSERT ( (bp[0] | ap[0]) & 1); /* FIXME: Check for small sizes first, before setting up temporary storage etc. */ scratch = MPN_GCD_SUBDIV_STEP_ITCH(n); if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD)) { mp_size_t hgcd_scratch; mp_size_t update_scratch; mp_size_t p = CHOOSE_P (n); mp_size_t dc_scratch; matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); hgcd_scratch = mpn_hgcd_itch (n - p); update_scratch = p + n - 1; dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch); if (dc_scratch > scratch) scratch = dc_scratch; } TMP_MARK; tp = TMP_ALLOC_LIMBS(scratch); while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD)) { struct hgcd_matrix M; mp_size_t p = 2*n/3; mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p); mp_size_t nn; mpn_hgcd_matrix_init (&M, n - p, tp); nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits, tp + matrix_scratch); if (nn > 0) { ASSERT (M.n <= (n - p - 1)/2); ASSERT (M.n + p <= (p + n - 1) / 2); /* Temporary storage 2 (p + M->n) <= p + n - 1. */ n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch); } else { /* Temporary storage n */ n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp); if (!n) { TMP_FREE; return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits); } } } while (n > 2) { struct hgcd_matrix1 M; mp_limb_t ah, al, bh, bl; mp_limb_t mask; mask = ap[n-1] | bp[n-1]; ASSERT (mask > 0); if (mask & GMP_NUMB_HIGHBIT) { ah = ap[n-1]; al = ap[n-2]; bh = bp[n-1]; bl = bp[n-2]; } else { int shift; count_leading_zeros (shift, mask); ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); } /* Try an mpn_nhgcd2 step */ if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits)) { n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n); MP_PTR_SWAP (ap, tp); } else { /* mpn_hgcd2 has failed. Then either one of a or b is very small, or the difference is very small. Perform one subtraction followed by one division. */ n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp); if (!n) { TMP_FREE; return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits); } } } if (bits >= 16) MP_PTR_SWAP (ap, bp); ASSERT (bp[0] & 1); if (n == 1) { mp_limb_t al, bl; al = ap[0]; bl = bp[0]; TMP_FREE; if (bl == 1) return 1 - 2*(bits & 1); else return mpn_jacobi_base (al, bl, bits << 1); } else { int res = mpn_jacobi_2 (ap, bp, bits & 1); TMP_FREE; return res; } }