void mpn_toom_interpolate_12pts (mp_ptr pp, mp_ptr r1, mp_ptr r3, mp_ptr r5, mp_size_t n, mp_size_t spt, int half, mp_ptr wsi) { mp_limb_t cy; mp_size_t n3; mp_size_t n3p1; n3 = 3 * n; n3p1 = n3 + 1; #define r4 (pp + n3) /* 3n+1 */ #define r2 (pp + 7 * n) /* 3n+1 */ #define r0 (pp +11 * n) /* s+t <= 2*n */ /******************************* interpolation *****************************/ if (half != 0) { cy = mpn_sub_n (r3, r3, r0, spt); MPN_DECR_U (r3 + spt, n3p1 - spt, cy); cy = DO_mpn_sublsh_n (r2, r0, spt, 10, wsi); MPN_DECR_U (r2 + spt, n3p1 - spt, cy); DO_mpn_subrsh(r5, n3p1, r0, spt, 2, wsi); cy = DO_mpn_sublsh_n (r1, r0, spt, 20, wsi); MPN_DECR_U (r1 + spt, n3p1 - spt, cy); DO_mpn_subrsh(r4, n3p1, r0, spt, 4, wsi); }; r4[n3] -= DO_mpn_sublsh_n (r4 + n, pp, 2 * n, 20, wsi); DO_mpn_subrsh(r1 + n, 2 * n + 1, pp, 2 * n, 4, wsi); #if HAVE_NATIVE_mpn_add_n_sub_n mpn_add_n_sub_n (r1, r4, r4, r1, n3p1); #else ASSERT_NOCARRY(mpn_add_n (wsi, r1, r4, n3p1)); mpn_sub_n (r4, r4, r1, n3p1); /* can be negative */ MP_PTR_SWAP(r1, wsi); #endif r5[n3] -= DO_mpn_sublsh_n (r5 + n, pp, 2 * n, 10, wsi); DO_mpn_subrsh(r2 + n, 2 * n + 1, pp, 2 * n, 2, wsi); #if HAVE_NATIVE_mpn_add_n_sub_n mpn_add_n_sub_n (r2, r5, r5, r2, n3p1); #else mpn_sub_n (wsi, r5, r2, n3p1); /* can be negative */ ASSERT_NOCARRY(mpn_add_n (r2, r2, r5, n3p1)); MP_PTR_SWAP(r5, wsi); #endif r3[n3] -= mpn_sub_n (r3+n, r3+n, pp, 2 * n); #if AORSMUL_FASTER_AORS_AORSLSH mpn_submul_1 (r4, r5, n3p1, 257); /* can be negative */ #else mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */ DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */ #endif /* A division by 2835x4 follows. Warning: the operand can be negative! */ mpn_divexact_by2835x4(r4, r4, n3p1); if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0) r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2)); #if AORSMUL_FASTER_2AORSLSH mpn_addmul_1 (r5, r4, n3p1, 60); /* can be negative */ #else DO_mpn_sublsh_n (r5, r4, n3p1, 2, wsi); /* can be negative */ DO_mpn_addlsh_n (r5, r4, n3p1, 6, wsi); /* can give a carry */ #endif mpn_divexact_by255(r5, r5, n3p1); ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r3, n3p1, 5, wsi)); #if AORSMUL_FASTER_3AORSLSH ASSERT_NOCARRY(mpn_submul_1 (r1, r2, n3p1, 100)); #else ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 6, wsi)); ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 5, wsi)); ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r2, n3p1, 2, wsi)); #endif ASSERT_NOCARRY(DO_mpn_sublsh_n (r1, r3, n3p1, 9, wsi)); mpn_divexact_by42525(r1, r1, n3p1); #if AORSMUL_FASTER_AORS_2AORSLSH ASSERT_NOCARRY(mpn_submul_1 (r2, r1, n3p1, 225)); #else ASSERT_NOCARRY(mpn_sub_n (r2, r2, r1, n3p1)); ASSERT_NOCARRY(DO_mpn_addlsh_n (r2, r1, n3p1, 5, wsi)); ASSERT_NOCARRY(DO_mpn_sublsh_n (r2, r1, n3p1, 8, wsi)); #endif mpn_divexact_by9x4(r2, r2, n3p1); ASSERT_NOCARRY(mpn_sub_n (r3, r3, r2, n3p1)); mpn_sub_n (r4, r2, r4, n3p1); ASSERT_NOCARRY(mpn_rshift(r4, r4, n3p1, 1)); ASSERT_NOCARRY(mpn_sub_n (r2, r2, r4, n3p1)); mpn_add_n (r5, r5, r1, n3p1); ASSERT_NOCARRY(mpn_rshift(r5, r5, n3p1, 1)); /* last interpolation steps... */ ASSERT_NOCARRY(mpn_sub_n (r3, r3, r1, n3p1)); ASSERT_NOCARRY(mpn_sub_n (r1, r1, r5, n3p1)); /* ... could be mixed with recomposition ||H-r5|M-r5|L-r5| ||H-r1|M-r1|L-r1| */ /***************************** recomposition *******************************/ /* pp[] prior to operations: |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp summation scheme for remaining operations: |__12|n_11|n_10|n__9|n__8|n__7|n__6|n__5|n__4|n__3|n__2|n___|n___|pp |M r0|L r0|___||H r2|M r2|L r2|___||H r4|M r4|L r4|____|H_r6|L r6|pp ||H r1|M r1|L r1| ||H r3|M r3|L r3| ||H_r5|M_r5|L_r5| */ cy = mpn_add_n (pp + n, pp + n, r5, n); cy = mpn_add_1 (pp + 2 * n, r5 + n, n, cy); #if HAVE_NATIVE_mpn_add_nc cy = r5[n3] + mpn_add_nc(pp + n3, pp + n3, r5 + 2 * n, n, cy); #else MPN_INCR_U (r5 + 2 * n, n + 1, cy); cy = r5[n3] + mpn_add_n (pp + n3, pp + n3, r5 + 2 * n, n); #endif MPN_INCR_U (pp + n3 + n, 2 * n + 1, cy); pp[2 * n3]+= mpn_add_n (pp + 5 * n, pp + 5 * n, r3, n); cy = mpn_add_1 (pp + 2 * n3, r3 + n, n, pp[2 * n3]); #if HAVE_NATIVE_mpn_add_nc cy = r3[n3] + mpn_add_nc(pp + 7 * n, pp + 7 * n, r3 + 2 * n, n, cy); #else MPN_INCR_U (r3 + 2 * n, n + 1, cy); cy = r3[n3] + mpn_add_n (pp + 7 * n, pp + 7 * n, r3 + 2 * n, n); #endif MPN_INCR_U (pp + 8 * n, 2 * n + 1, cy); pp[10*n]+=mpn_add_n (pp + 9 * n, pp + 9 * n, r1, n); if (half) { cy = mpn_add_1 (pp + 10 * n, r1 + n, n, pp[10 * n]); #if HAVE_NATIVE_mpn_add_nc if (LIKELY (spt > n)) { cy = r1[n3] + mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, n, cy); MPN_INCR_U (pp + 4 * n3, spt - n, cy); } else { ASSERT_NOCARRY(mpn_add_nc(pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt, cy)); } #else MPN_INCR_U (r1 + 2 * n, n + 1, cy); if (LIKELY (spt > n)) { cy = r1[n3] + mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, n); MPN_INCR_U (pp + 4 * n3, spt - n, cy); } else { ASSERT_NOCARRY(mpn_add_n (pp + 11 * n, pp + 11 * n, r1 + 2 * n, spt)); } #endif } else { ASSERT_NOCARRY(mpn_add_1 (pp + 10 * n, r1 + n, spt, pp[10 * n])); } #undef r0 #undef r2 #undef r4 }
void mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n, mp_ptr r3, mp_ptr r7, mp_size_t spt, mp_ptr ws) { mp_limb_signed_t cy; mp_ptr r5, r1; r5 = (pp + 3 * n); /* 3n+1 */ r1 = (pp + 7 * n); /* spt */ /******************************* interpolation *****************************/ DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws); cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws); MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy); DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws); cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws); MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy); r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n); cy = mpn_sub_n (r7, r7, r1, spt); MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy); ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1)); ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2)); ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1)); ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1)); mpn_divexact_by45 (r3, r3, 3 * n + 1); ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1)); ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws)); /* last interpolation steps... */ /* ... are mixed with recomposition */ /***************************** recomposition *******************************/ /* pp[] prior to operations: |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp summation scheme for remaining operations: |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp ||_H r3|_M r3|_L*r3| ||_H_r7|_M_r7|_L_r7| ||-H r3|-M r3|-L*r3| ||-H*r5|-M_r5|-L_r5| */ cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */ cy-= mpn_sub_n (pp + n, pp + n, r5, n); if (0 > cy) MPN_DECR_U (r7 + n, 2*n + 1, 1); else MPN_INCR_U (r7 + n, 2*n + 1, cy); cy = mpn_sub_n (pp + 2*n, r7 + n, r5 + n, n); /* Mr7-Mr5 */ MPN_DECR_U (r7 + 2*n, n + 1, cy); cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */ r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */ cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */ if (UNLIKELY(0 > cy)) MPN_DECR_U (r5 + n + 1, 2*n, 1); else MPN_INCR_U (r5 + n + 1, 2*n, cy); ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */ cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]); MPN_INCR_U (r3 + 2*n, n + 1, cy); cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n); if (LIKELY(spt != n)) MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]); else ASSERT (r3[3*n] | cy == 0); }