void mpn_toom2_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch) { const int __gmpn_cpuvec_initialized = 1; mp_size_t n, s; mp_limb_t cy, cy2; mp_ptr asm1; #define a0 ap #define a1 (ap + n) s = an >> 1; n = an - s; ASSERT (0 < s && s <= n && s >= n - 1); asm1 = pp; /* Compute asm1. */ if (s == n) { if (mpn_cmp (a0, a1, n) < 0) { mpn_sub_n (asm1, a1, a0, n); } else { mpn_sub_n (asm1, a0, a1, n); } } else /* n - s == 1 */ { if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0) { mpn_sub_n (asm1, a1, a0, s); asm1[s] = 0; } else { asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s); } } #define v0 pp /* 2n */ #define vinf (pp + 2 * n) /* s+s */ #define vm1 scratch /* 2n */ #define scratch_out scratch + 2 * n /* vm1, 2n limbs */ TOOM2_SQR_REC (vm1, asm1, n, scratch_out); /* vinf, s+s limbs */ TOOM2_SQR_REC (vinf, a1, s, scratch_out); /* v0, 2n limbs */ TOOM2_SQR_REC (v0, ap, n, scratch_out); /* H(v0) + L(vinf) */ cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n); /* L(v0) + H(v0) */ cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n); /* L(vinf) + H(vinf) */ cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n); cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n); ASSERT (cy + 1 <= 3); ASSERT (cy2 <= 2); MPN_INCR_U (pp + 2 * n, s + s, cy2); if (LIKELY (cy <= 2)) MPN_INCR_U (pp + 3 * n, s + s - n, cy); else MPN_DECR_U (pp + 3 * n, s + s - n, 1); }
void mpn_toom2_sqr (mp_ptr pp, mp_srcptr ap, mp_size_t an, mp_ptr scratch) { mp_size_t n, s; mp_limb_t cy, cy2; mp_ptr asm1; #define a0 ap #define a1 (ap + n) s = an >> 1; n = an - s; ASSERT (0 < s && s <= n); asm1 = pp; /* Compute asm1. */ if (s == n) { if (mpn_cmp (a0, a1, n) < 0) { mpn_sub_n (asm1, a1, a0, n); } else { mpn_sub_n (asm1, a0, a1, n); } } else { if (mpn_zero_p (a0 + s, n - s) && mpn_cmp (a0, a1, s) < 0) { mpn_sub_n (asm1, a1, a0, s); MPN_ZERO (asm1 + s, n - s); } else { mpn_sub (asm1, a0, n, a1, s); } } #define v0 pp /* 2n */ #define vinf (pp + 2 * n) /* s+s */ #define vm1 scratch /* 2n */ #define scratch_out scratch + 2 * n /* vm1, 2n limbs */ TOOM2_SQR_REC (vm1, asm1, n, scratch_out); /* vinf, s+s limbs */ TOOM2_SQR_REC (vinf, a1, s, scratch_out); /* v0, 2n limbs */ TOOM2_SQR_REC (v0, ap, n, scratch_out); /* H(v0) + L(vinf) */ cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n); /* L(v0) + H(v0) */ cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n); /* L(vinf) + H(vinf) */ cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + s - n); cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n); ASSERT (cy + 1 <= 3); ASSERT (cy2 <= 2); mpn_incr_u (pp + 2 * n, cy2); if (LIKELY (cy <= 2)) mpn_incr_u (pp + 3 * n, cy); else mpn_decr_u (pp + 3 * n, 1); }