void mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v) { mp_size_t usize; mp_size_t vsize; mp_size_t wsize; mp_size_t sign_product; mp_ptr up, vp; mp_ptr wp; mp_ptr free_me; size_t free_me_size; mp_limb_t cy_limb; TMP_DECL; usize = SIZ (u); vsize = SIZ (v); sign_product = usize ^ vsize; usize = ABS (usize); vsize = ABS (vsize); if (usize < vsize) { MPZ_SRCPTR_SWAP (u, v); MP_SIZE_T_SWAP (usize, vsize); } if (vsize == 0) { SIZ (w) = 0; return; } #if HAVE_NATIVE_mpn_mul_2 if (vsize <= 2) { wp = MPZ_REALLOC (w, usize+vsize); if (vsize == 1) cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]); else { cy_limb = mpn_mul_2 (wp, PTR (u), usize, PTR (v)); usize++; } wp[usize] = cy_limb; usize += (cy_limb != 0); SIZ (w) = (sign_product >= 0 ? usize : -usize); return; } #else if (vsize == 1) { wp = MPZ_REALLOC (w, usize+1); cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]); wp[usize] = cy_limb; usize += (cy_limb != 0); SIZ (w) = (sign_product >= 0 ? usize : -usize); return; } #endif TMP_MARK; free_me = NULL; up = PTR (u); vp = PTR (v); wp = PTR (w); /* Ensure W has space enough to store the result. */ wsize = usize + vsize; if (ALLOC (w) < wsize) { if (wp == up || wp == vp) { free_me = wp; free_me_size = ALLOC (w); } else (*__gmp_free_func) (wp, (size_t) ALLOC (w) * GMP_LIMB_BYTES); ALLOC (w) = wsize; wp = __GMP_ALLOCATE_FUNC_LIMBS (wsize); PTR (w) = wp; } else { /* Make U and V not overlap with W. */ if (wp == up) { /* W and U are identical. Allocate temporary space for U. */ up = TMP_ALLOC_LIMBS (usize); /* Is V identical too? Keep it identical with U. */ if (wp == vp) vp = up; /* Copy to the temporary space. */ MPN_COPY (up, wp, usize); } else if (wp == vp) { /* W and V are identical. Allocate temporary space for V. */ vp = TMP_ALLOC_LIMBS (vsize); /* Copy to the temporary space. */ MPN_COPY (vp, wp, vsize); } } if (up == vp) { mpn_sqr (wp, up, usize); cy_limb = wp[wsize - 1]; } else { cy_limb = mpn_mul (wp, up, usize, vp, vsize); } wsize -= cy_limb == 0; SIZ (w) = sign_product < 0 ? -wsize : wsize; if (free_me != NULL) (*__gmp_free_func) (free_me, free_me_size * GMP_LIMB_BYTES); TMP_FREE; }
void mpn_mul_basecase (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { ASSERT (un >= vn); ASSERT (vn >= 1); ASSERT (! MPN_OVERLAP_P (rp, un+vn, up, un)); ASSERT (! MPN_OVERLAP_P (rp, un+vn, vp, vn)); /* We first multiply by the low order limb (or depending on optional function availability, limbs). This result can be stored, not added, to rp. We also avoid a loop for zeroing this way. */ #if HAVE_NATIVE_mpn_mul_2 if (vn >= 2) { rp[un + 1] = mpn_mul_2 (rp, up, un, vp); rp += 2, vp += 2, vn -= 2; } else { rp[un] = mpn_mul_1 (rp, up, un, vp[0]); return; } #else rp[un] = mpn_mul_1 (rp, up, un, vp[0]); rp += 1, vp += 1, vn -= 1; #endif /* Now accumulate the product of up[] and the next low-order limb (or depending on optional function availability, limbs) from vp[0]. */ #define MAX_LEFT MP_SIZE_T_MAX #if HAVE_NATIVE_mpn_addmul_4 while (vn >= 4) { rp[un + 4 - 1] = mpn_addmul_4 (rp, up, un, vp); rp += 4, vp += 4, vn -= 4; } #undef MAX_LEFT #define MAX_LEFT 3 #endif #if HAVE_NATIVE_mpn_addmul_3 while (vn >= 3) { rp[un + 3 - 1] = mpn_addmul_3 (rp, up, un, vp); rp += 3, vp += 3, vn -= 3; if (MAX_LEFT - 3 <= 3) break; } #undef MAX_LEFT #define MAX_LEFT 2 #endif #if HAVE_NATIVE_mpn_addmul_2 while (vn >= 2) { rp[un + 2 - 1] = mpn_addmul_2 (rp, up, un, vp); rp += 2, vp += 2, vn -= 2; if (MAX_LEFT - 2 <= 2) break; } #undef MAX_LEFT #define MAX_LEFT 1 #endif while (vn >= 1) { rp[un] = mpn_addmul_1 (rp, up, un, vp[0]); rp += 1, vp += 1, vn -= 1; if (MAX_LEFT - 1 <= 1) break; } }
void check (void) { mp_limb_t wp[100], xp[100], yp[100]; mp_size_t size = 100; refmpn_zero (xp, size); refmpn_zero (yp, size); refmpn_zero (wp, size); pre ("mpn_add_n"); mpn_add_n (wp, xp, yp, size); post (); #if HAVE_NATIVE_mpn_add_nc pre ("mpn_add_nc"); mpn_add_nc (wp, xp, yp, size, CNST_LIMB(0)); post (); #endif #if HAVE_NATIVE_mpn_addlsh1_n pre ("mpn_addlsh1_n"); mpn_addlsh1_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_and_n pre ("mpn_and_n"); mpn_and_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_andn_n pre ("mpn_andn_n"); mpn_andn_n (wp, xp, yp, size); post (); #endif pre ("mpn_addmul_1"); mpn_addmul_1 (wp, xp, size, yp[0]); post (); #if HAVE_NATIVE_mpn_addmul_1c pre ("mpn_addmul_1c"); mpn_addmul_1c (wp, xp, size, yp[0], CNST_LIMB(0)); post (); #endif #if HAVE_NATIVE_mpn_com_n pre ("mpn_com_n"); mpn_com_n (wp, xp, size); post (); #endif #if HAVE_NATIVE_mpn_copyd pre ("mpn_copyd"); mpn_copyd (wp, xp, size); post (); #endif #if HAVE_NATIVE_mpn_copyi pre ("mpn_copyi"); mpn_copyi (wp, xp, size); post (); #endif pre ("mpn_divexact_1"); mpn_divexact_1 (wp, xp, size, CNST_LIMB(123)); post (); pre ("mpn_divexact_by3c"); mpn_divexact_by3c (wp, xp, size, CNST_LIMB(0)); post (); pre ("mpn_divrem_1"); mpn_divrem_1 (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123)); post (); #if HAVE_NATIVE_mpn_divrem_1c pre ("mpn_divrem_1c"); mpn_divrem_1c (wp, (mp_size_t) 0, xp, size, CNST_LIMB(123), CNST_LIMB(122)); post (); #endif pre ("mpn_gcd_1"); xp[0] |= 1; notdead += (unsigned long) mpn_gcd_1 (xp, size, CNST_LIMB(123)); post (); #if HAVE_NATIVE_mpn_gcd_finda pre ("mpn_gcd_finda"); xp[0] |= 1; xp[1] |= 1; notdead += mpn_gcd_finda (xp); post (); #endif pre ("mpn_hamdist"); notdead += mpn_hamdist (xp, yp, size); post (); #if HAVE_NATIVE_mpn_ior_n pre ("mpn_ior_n"); mpn_ior_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_iorn_n pre ("mpn_iorn_n"); mpn_iorn_n (wp, xp, yp, size); post (); #endif pre ("mpn_lshift"); mpn_lshift (wp, xp, size, 1); post (); pre ("mpn_mod_1"); notdead += mpn_mod_1 (xp, size, CNST_LIMB(123)); post (); #if HAVE_NATIVE_mpn_mod_1c pre ("mpn_mod_1c"); notdead += mpn_mod_1c (xp, size, CNST_LIMB(123), CNST_LIMB(122)); post (); #endif #if GMP_NUMB_BITS % 4 == 0 pre ("mpn_mod_34lsub1"); notdead += mpn_mod_34lsub1 (xp, size); post (); #endif pre ("mpn_modexact_1_odd"); notdead += mpn_modexact_1_odd (xp, size, CNST_LIMB(123)); post (); pre ("mpn_modexact_1c_odd"); notdead += mpn_modexact_1c_odd (xp, size, CNST_LIMB(123), CNST_LIMB(456)); post (); pre ("mpn_mul_1"); mpn_mul_1 (wp, xp, size, yp[0]); post (); #if HAVE_NATIVE_mpn_mul_1c pre ("mpn_mul_1c"); mpn_mul_1c (wp, xp, size, yp[0], CNST_LIMB(0)); post (); #endif #if HAVE_NATIVE_mpn_mul_2 pre ("mpn_mul_2"); mpn_mul_2 (wp, xp, size-1, yp); post (); #endif pre ("mpn_mul_basecase"); mpn_mul_basecase (wp, xp, (mp_size_t) 3, yp, (mp_size_t) 3); post (); #if HAVE_NATIVE_mpn_nand_n pre ("mpn_nand_n"); mpn_nand_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_nior_n pre ("mpn_nior_n"); mpn_nior_n (wp, xp, yp, size); post (); #endif pre ("mpn_popcount"); notdead += mpn_popcount (xp, size); post (); pre ("mpn_preinv_mod_1"); notdead += mpn_preinv_mod_1 (xp, size, GMP_NUMB_MAX, refmpn_invert_limb (GMP_NUMB_MAX)); post (); #if USE_PREINV_DIVREM_1 || HAVE_NATIVE_mpn_preinv_divrem_1 pre ("mpn_preinv_divrem_1"); mpn_preinv_divrem_1 (wp, (mp_size_t) 0, xp, size, GMP_NUMB_MAX, refmpn_invert_limb (GMP_NUMB_MAX), 0); post (); #endif #if HAVE_NATIVE_mpn_rsh1add_n pre ("mpn_rsh1add_n"); mpn_rsh1add_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_rsh1sub_n pre ("mpn_rsh1sub_n"); mpn_rsh1sub_n (wp, xp, yp, size); post (); #endif pre ("mpn_rshift"); mpn_rshift (wp, xp, size, 1); post (); pre ("mpn_sqr_basecase"); mpn_sqr_basecase (wp, xp, (mp_size_t) 3); post (); pre ("mpn_submul_1"); mpn_submul_1 (wp, xp, size, yp[0]); post (); #if HAVE_NATIVE_mpn_submul_1c pre ("mpn_submul_1c"); mpn_submul_1c (wp, xp, size, yp[0], CNST_LIMB(0)); post (); #endif pre ("mpn_sub_n"); mpn_sub_n (wp, xp, yp, size); post (); #if HAVE_NATIVE_mpn_sub_nc pre ("mpn_sub_nc"); mpn_sub_nc (wp, xp, yp, size, CNST_LIMB(0)); post (); #endif #if HAVE_NATIVE_mpn_sublsh1_n pre ("mpn_sublsh1_n"); mpn_sublsh1_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_udiv_qrnnd pre ("mpn_udiv_qrnnd"); mpn_udiv_qrnnd (&wp[0], CNST_LIMB(122), xp[0], CNST_LIMB(123)); post (); #endif #if HAVE_NATIVE_mpn_udiv_qrnnd_r pre ("mpn_udiv_qrnnd_r"); mpn_udiv_qrnnd (CNST_LIMB(122), xp[0], CNST_LIMB(123), &wp[0]); post (); #endif #if HAVE_NATIVE_mpn_umul_ppmm pre ("mpn_umul_ppmm"); mpn_umul_ppmm (&wp[0], xp[0], yp[0]); post (); #endif #if HAVE_NATIVE_mpn_umul_ppmm_r pre ("mpn_umul_ppmm_r"); mpn_umul_ppmm_r (&wp[0], xp[0], yp[0]); post (); #endif #if HAVE_NATIVE_mpn_xor_n pre ("mpn_xor_n"); mpn_xor_n (wp, xp, yp, size); post (); #endif #if HAVE_NATIVE_mpn_xnor_n pre ("mpn_xnor_n"); mpn_xnor_n (wp, xp, yp, size); post (); #endif }
void mpn_mul_basecase (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn) { ASSERT (un >= vn); ASSERT (vn >= 1); ASSERT (! MPN_OVERLAP_P (rp, un+vn, up, un)); ASSERT (! MPN_OVERLAP_P (rp, un+vn, vp, vn)); /* We first multiply by the low order limb (or depending on optional function availability, limbs). This result can be stored, not added, to rp. We also avoid a loop for zeroing this way. */ #ifdef HAVE_NATIVE_mpn_mul_2 if (vn >= 2) { rp[un + 1] = mpn_mul_2 (rp, up, un, vp); rp += 2, vp += 2, vn -= 2; } else { rp[un] = mpn_mul_1 (rp, up, un, vp[0]); return; } #else rp[un] = mpn_mul_1 (rp, up, un, vp[0]); rp += 1, vp += 1, vn -= 1; #endif /* Now accumulate the product of up[] and the next higher limb (or depending on optional function availability, limbs) from vp[]. */ #define MAX_LEFT MP_SIZE_T_MAX /* Used to simplify loops into if statements */ #ifdef HAVE_NATIVE_mpn_addmul_6 while (vn >= 6) { rp[un + 6 - 1] = mpn_addmul_6 (rp, up, un, vp); if (MAX_LEFT == 6) return; rp += 6, vp += 6, vn -= 6; if (MAX_LEFT < 2 * 6) break; } #undef MAX_LEFT #define MAX_LEFT (6 - 1) #endif #ifdef HAVE_NATIVE_mpn_addmul_5 while (vn >= 5) { rp[un + 5 - 1] = mpn_addmul_5 (rp, up, un, vp); if (MAX_LEFT == 5) return; rp += 5, vp += 5, vn -= 5; if (MAX_LEFT < 2 * 5) break; } #undef MAX_LEFT #define MAX_LEFT (5 - 1) #endif #ifdef HAVE_NATIVE_mpn_addmul_4 while (vn >= 4) { rp[un + 4 - 1] = mpn_addmul_4 (rp, up, un, vp); if (MAX_LEFT == 4) return; rp += 4, vp += 4, vn -= 4; if (MAX_LEFT < 2 * 4) break; } #undef MAX_LEFT #define MAX_LEFT (4 - 1) #endif #ifdef HAVE_NATIVE_mpn_addmul_3 while (vn >= 3) { rp[un + 3 - 1] = mpn_addmul_3 (rp, up, un, vp); if (MAX_LEFT == 3) return; rp += 3, vp += 3, vn -= 3; if (MAX_LEFT < 2 * 3) break; } #undef MAX_LEFT #define MAX_LEFT (3 - 1) #endif #ifdef HAVE_NATIVE_mpn_addmul_2 while (vn >= 2) { rp[un + 2 - 1] = mpn_addmul_2 (rp, up, un, vp); if (MAX_LEFT == 2) return; rp += 2, vp += 2, vn -= 2; if (MAX_LEFT < 2 * 2) break; } #undef MAX_LEFT #define MAX_LEFT (2 - 1) #endif while (vn >= 1) { rp[un] = mpn_addmul_1 (rp, up, un, vp[0]); if (MAX_LEFT == 1) return; rp += 1, vp += 1, vn -= 1; } }
/* (rp, 2n) = (xp, n)*(yp, n) / B^n */ inline static void mpn_mulshort_n_basecase(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) { mp_size_t i, k; ASSERT(n >= 3); /* this restriction doesn't make a lot of sense in general */ ASSERT_MPN(xp, n); ASSERT_MPN(yp, n); ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n)); ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n)); k = n - 2; /* so want short product sum_(i + j >= k) x[i]y[j]B^(i + j) */ i = 0; /* Multiply w limbs from y + i to (2 + i + w - 1) limbs from x + (n - 2 - i - w + 1) and put it into r + (n - 2 - w + 1), "overflow" (i.e. last) limb into r + (n + w - 1) for i between 0 and n - 2. i == n - w needs special treatment. */ /* We first multiply by the low order limb (or depending on optional function availability, limbs). This result can be stored, not added, to rp. We also avoid a loop for zeroing this way. */ #if HAVE_NATIVE_mpn_mul_2 rp[n + 1] = mpn_mul_2 (rp + k - 1, xp + k - 1, 2 + 1, yp); i += 2; #else rp[n] = mpn_mul_1 (rp + k, xp + k, 2, yp[0]); i += 1; #endif #if HAVE_NATIVE_mpn_addmul_6 while (i < n - 6) { rp[n + i + 6 - 1] = mpn_addmul_6 (rp + k - 6 + 1, xp + k - i - 6 + 1, 2 + i + 6 - 1, yp + i); i += 6; } if (i == n - 6) { rp[n + n - 1] = mpn_addmul_6 (rp + i, xp, n, yp + i); return; } #endif #if HAVE_NATIVE_mpn_addmul_5 while (i < n - 5) { rp[n + i + 5 - 1] = mpn_addmul_5 (rp + k - 5 + 1, xp + k - i - 5 + 1, 2 + i + 5 - 1, yp + i) i += 5; } if (i == n - 5) { rp[n + n - 1] = mpn_addmul_5 (rp + i, xp, n, yp + i); return; } #endif #if HAVE_NATIVE_mpn_addmul_4 while (i < n - 4) { rp[n + i + 4 - 1] = mpn_addmul_4 (rp + k - 4 + 1, xp + k - i - 4 + 1, 2 + i + 4 - 1, yp + i); i += 4; } if (i == n - 4) { rp[n + n - 1] = mpn_addmul_4 (rp + i, xp, n, yp + i); return; } #endif #if HAVE_NATIVE_mpn_addmul_3 while (i < n - 3) { rp[n + i + 3 - 1] = mpn_addmul_3 (rp + k - 3 + 1, xp + k - i - 3 + 1, 2 + i + 3 - 1, yp + i); i += 3; } if (i == n - 3) { rp[n + n - 1] = mpn_addmul_3 (rp + i, xp, n, yp + i); return; } #endif #if HAVE_NATIVE_mpn_addmul_2 while (i < n - 2) { rp[n + i + 2 - 1] = mpn_addmul_2 (rp + k - 2 + 1, xp + k - i - 2 + 1, 2 + i + 2 - 1, yp + i); i += 2; } if (i == n - 2) { rp[n + n - 1] = mpn_addmul_2 (rp + i, xp, n, yp + i); return; } #endif while (i < n - 1) { rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]); i += 1; } rp[n + n - 1] = mpn_addmul_1 (rp + i, xp, n, yp[i]); return; }