void __fmpz_mul(fmpz_t res, const fmpz_t a, const fmpz_t b) { long a0 = a[0]; long b0 = b[0]; unsigned long sizea = FLINT_ABS(a0); unsigned long sizeb = FLINT_ABS(b0); while ((!a[sizea]) && (sizea)) sizea--; while ((!b[sizeb]) && (sizeb)) sizeb--; mp_limb_t mslimb; fmpz_t temp; if ((sizea == 0) || (sizeb == 0)) { res[0] = 0; } else if (sizea + sizeb < 100) { if (sizea >= sizeb) mslimb = mpn_mul(res+1, a+1, sizea, b+1, sizeb); else mslimb = mpn_mul(res+1, b+1, sizeb, a+1, sizea); res[0] = sizea + sizeb - (mslimb == 0); if ((long) (a[0] ^ b[0]) < 0) res[0] = -res[0]; } else { if (sizea >= sizeb) mslimb = F_mpn_mul(res+1, a+1, sizea, b+1, sizeb); else mslimb = F_mpn_mul(res+1, b+1, sizeb, a+1, sizea); res[0] = sizea+sizeb - (mslimb == 0); if ((long) (a[0] ^ b[0]) < 0) res[0] = -res[0]; } }
void fmpz_addmul(fmpz_t res, const fmpz_t a, const fmpz_t b) { long a0 = a[0]; long b0 = b[0]; unsigned long sizea = FLINT_ABS(a0); unsigned long sizeb = FLINT_ABS(b0); while ((!a[sizea]) && (sizea)) sizea--; while ((!b[sizeb]) && (sizeb)) sizeb--; fmpz_t temp; mp_limb_t mslimb; if (sizea && sizeb) { if (sizea + sizeb < 100) { temp = (fmpz_t) flint_stack_alloc_small(sizea + sizeb + 1); if (sizea >= sizeb) mslimb = mpn_mul(temp+1, a+1, sizea, b+1, sizeb); else mslimb = mpn_mul(temp+1, b+1, sizeb, a+1, sizea); temp[0] = sizea + sizeb - (mslimb == 0); if ((long) (a[0] ^ b[0]) < 0) temp[0] = -temp[0]; fmpz_add(res, res, temp); flint_stack_release_small(); } else { temp = (fmpz_t) flint_stack_alloc(sizea + sizeb + 1); if (sizea >= sizeb) mslimb = F_mpn_mul(temp+1, a+1, sizea, b+1, sizeb); else mslimb = F_mpn_mul(temp+1, b+1, sizeb, a+1, sizea); temp[0] = sizea + sizeb - (mslimb == 0); if ((long) (a[0] ^ b[0]) < 0) temp[0] = -temp[0]; fmpz_add(res, res, temp); flint_stack_release(); } } }
mp_size_t mpn_prod_limbs_balanced(mp_limb_t * result, mp_limb_t * scratch, const mp_limb_t * factors, mp_size_t n, ulong bits) { mp_size_t an, bn, alen, blen, len; mp_limb_t top; if (n < PROD_LIMBS_DIRECT_CUTOFF) return mpn_prod_limbs_direct(result, factors, n); an = n/2; bn = n - an; alen = mpn_prod_limbs_balanced(scratch, result, factors, an, bits); blen = mpn_prod_limbs_balanced(scratch + alen, result, factors + an, bn, bits); len = alen + blen; if (alen <= blen) top = mpn_mul(result, scratch + alen, blen, scratch, alen); else top = mpn_mul(result, scratch, alen, scratch + alen, blen); if (!top) len--; return len; }
int test_F_mpn_mul_precache() { mp_limb_t * int1, * int2, * product, * product2; F_mpn_precache_t precache; mp_limb_t msl; int result = 1; unsigned long count; for (count = 0; (count < 30) && (result == 1); count++) { unsigned long limbs2 = randint(2*FLINT_FFT_LIMBS_CROSSOVER)+1; unsigned long limbs1 = randint(2*FLINT_FFT_LIMBS_CROSSOVER)+1; int1 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs1); mpn_random2(int1, limbs1); F_mpn_mul_precache_init(precache, int1, limbs1, limbs2); unsigned long count2; for (count2 = 0; (count2 < 30) && (result == 1); count2++) { #if DEBUG printf("%ld, %ld\n",limbs1, limbs2); #endif unsigned long limbs3 = randint(limbs2)+1; int2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs3); product = (mp_limb_t *) malloc(sizeof(mp_limb_t)*(limbs1+limbs2)); product2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*(limbs1+limbs2)); F_mpn_clear(int2, limbs3); mpn_random2(int2, limbs3); F_mpn_mul_precache(product, int2, limbs3, precache); if (limbs1 > limbs3) msl = mpn_mul(product2, int1, limbs1, int2, limbs3); else msl = mpn_mul(product2, int2, limbs3, int1, limbs1); unsigned long j; for (j = 0; j < limbs1+limbs3 - (msl == 0); j++) { if (product[j] != product2[j]) result = 0; } free(product2); free(product); free(int2); } F_mpn_mul_precache_clear(precache); free(int1); } return result; }
int main () { mp_limb_t nptr[2 * SIZE]; mp_limb_t dptr[2 * SIZE]; mp_limb_t qptr[2 * SIZE]; mp_limb_t pptr[2 * SIZE + 1]; mp_limb_t rptr[2 * SIZE]; mp_size_t nsize, dsize, qsize, rsize, psize; int test; mp_limb_t qlimb; for (test = 0; ; test++) { printf ("%d\n", test); #ifdef RANDOM nsize = random () % (2 * SIZE) + 1; dsize = random () % nsize + 1; #else nsize = 2 * SIZE; dsize = SIZE; #endif mpn_random2 (nptr, nsize); mpn_random2 (dptr, dsize); dptr[dsize - 1] |= (mp_limb_t) 1 << (GMP_LIMB_BITS - 1); MPN_COPY (rptr, nptr, nsize); qlimb = mpn_divrem (qptr, (mp_size_t) 0, rptr, nsize, dptr, dsize); rsize = dsize; qsize = nsize - dsize; qptr[qsize] = qlimb; qsize += qlimb; if (qsize == 0 || qsize > 2 * SIZE) { continue; /* bogus */ } else { mp_limb_t cy; if (qsize > dsize) mpn_mul (pptr, qptr, qsize, dptr, dsize); else mpn_mul (pptr, dptr, dsize, qptr, qsize); psize = qsize + dsize; psize -= pptr[psize - 1] == 0; cy = mpn_add (pptr, pptr, psize, rptr, rsize); pptr[psize] = cy; psize += cy; } if (nsize != psize || mpn_cmp (nptr, pptr, nsize) != 0) abort (); } }
void check_one (mp_ptr qp, mp_srcptr rp, mp_limb_t rh, mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, const char *fname) { mp_size_t qn; int cmp; mp_ptr tp; mp_limb_t cy = 4711; /* silence warnings */ TMP_DECL; qn = nn - dn; if (qn == 0) return; TMP_MARK; tp = TMP_ALLOC_LIMBS (nn + 1); if (dn >= qn) mpn_mul (tp, dp, dn, qp, qn); else mpn_mul (tp, qp, qn, dp, dn); if (rp != NULL) { cy = mpn_add_n (tp + qn, tp + qn, rp, dn); cmp = cy != rh || mpn_cmp (tp, np, nn) != 0; } else cmp = mpn_cmp (tp, np, nn - dn) != 0; if (cmp != 0) { printf ("\r*******************************************************************************\n"); printf ("%s inconsistent in test %lu\n", fname, test); printf ("N= "); dumpy (np, nn); printf ("D= "); dumpy (dp, dn); printf ("Q= "); dumpy (qp, qn); if (rp != NULL) { printf ("R= "); dumpy (rp, dn); printf ("Rb= %d, Cy=%d\n", (int) cy, (int) rh); } printf ("T= "); dumpy (tp, nn); printf ("nn = %ld, dn = %ld, qn = %ld", nn, dn, qn); printf ("\n*******************************************************************************\n"); abort (); } TMP_FREE; }
void fmpz_mul(fmpz_t res, const fmpz_t a, const fmpz_t b) { long a0 = a[0]; long b0 = b[0]; unsigned long sizea = FLINT_ABS(a0); unsigned long sizeb = FLINT_ABS(b0); while ((!a[sizea]) && (sizea)) sizea--; while ((!b[sizeb]) && (sizeb)) sizeb--; mp_limb_t mslimb; fmpz_t temp; if ((sizea == 0) || (sizeb == 0)) { res[0] = 0; } else if (sizea + sizeb < 100) { temp = (fmpz_t) flint_stack_alloc_small(sizea + sizeb + 1); if (sizea > sizeb) mslimb = mpn_mul(temp+1, a+1, sizea, b+1, sizeb); else if (sizea == sizeb) { mpn_mul_n(temp+1, a+1, b+1, sizeb); mslimb = temp[2*sizeb]; } else mslimb = mpn_mul(temp+1, b+1, sizeb, a+1, sizea); temp[0] = sizea + sizeb - (mslimb == 0); F_mpn_copy(res, temp, temp[0]+1); if ((long) (a0 ^ b0) < 0) res[0] = -res[0]; flint_stack_release_small(); } else if (sizea + sizeb < 2*FLINT_FFT_LIMBS_CROSSOVER) { temp = (fmpz_t) flint_stack_alloc(sizea + sizeb + 1); if (sizea > sizeb) mslimb = mpn_mul(temp+1, a+1, sizea, b+1, sizeb); else if (sizea == sizeb) { mpn_mul_n(temp+1, a+1, b+1, sizeb); mslimb = temp[2*sizeb]; } else mslimb = mpn_mul(temp+1, b+1, sizeb, a+1, sizea); temp[0] = sizea + sizeb - (mslimb == 0); F_mpn_copy(res, temp, temp[0]+1); if ((long) (a0 ^ b0) < 0) res[0] = -res[0]; flint_stack_release(); } else { if (sizea >= sizeb) mslimb = F_mpn_mul(res+1, a+1, sizea, b+1, sizeb); else mslimb = F_mpn_mul(res+1, b+1, sizeb, a+1, sizea); res[0] = sizea+sizeb - (mslimb == 0); if ((long) (a0 ^ b0) < 0) res[0] = -res[0]; } }
/* Check divide and conquer hensel division routine. */ void check_dc_bdiv_q (void) { mp_limb_t np[2*MAX_LIMBS]; mp_limb_t np2[2*MAX_LIMBS]; mp_limb_t rp[3*MAX_LIMBS]; mp_limb_t dp[MAX_LIMBS]; mp_limb_t qp[2*MAX_LIMBS]; mp_limb_t dip; mp_size_t nn, rn, dn, qn; gmp_randstate_t rands; int i, j, s; gmp_randinit_default(rands); for (i = 0; i < ITERS; i++) { dn = (random() % (MAX_LIMBS - 5)) + 6; nn = (random() % MAX_LIMBS) + dn; mpn_rrandom (np, rands, nn); mpn_rrandom (dp, rands, dn); dp[0] |= 1; MPN_COPY(np2, np, nn); modlimb_invert(dip, dp[0]); mpn_dc_bdiv_q(qp, np, nn, dp, dn, dip); if (nn >= dn) mpn_mul(rp, qp, nn, dp, dn); else mpn_mul(rp, dp, dn, qp, nn); if (mpn_cmp(rp, np2, nn) != 0) { printf("failed: quotient wrong!\n"); printf ("nn = %lu, dn = %lu\n\n", nn, dn); gmp_printf (" np: %Nx\n\n", np2, nn); gmp_printf (" dp: %Nx\n\n", dp, dn); gmp_printf (" qp: %Nx\n\n", qp, nn); gmp_printf (" rp: %Nx\n\n", rp, nn); abort (); } } gmp_randclear(rands); }
/* Computes (r;b) = (a; b) M. Result is of size n + M->n +/- 1, and the size is returned (if inputs are non-normalized, result may be non-normalized too). Temporary space needed is M->n + n. */ static size_t hgcd_mul_matrix_vector (struct hgcd_matrix *M, mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp) { mp_limb_t ah, bh; /* Compute (r,b) <-- (u00 a + u10 b, u01 a + u11 b) as t = u00 * a r = u10 * b r += t; t = u11 * b b = u01 * a b += t; */ if (M->n >= n) { mpn_mul (tp, M->p[0][0], M->n, ap, n); mpn_mul (rp, M->p[1][0], M->n, bp, n); } else { mpn_mul (tp, ap, n, M->p[0][0], M->n); mpn_mul (rp, bp, n, M->p[1][0], M->n); } ah = mpn_add_n (rp, rp, tp, n + M->n); if (M->n >= n) { mpn_mul (tp, M->p[1][1], M->n, bp, n); mpn_mul (bp, M->p[0][1], M->n, ap, n); } else { mpn_mul (tp, bp, n, M->p[1][1], M->n); mpn_mul (bp, ap, n, M->p[0][1], M->n); } bh = mpn_add_n (bp, bp, tp, n + M->n); n += M->n; if ( (ah | bh) > 0) { rp[n] = ah; bp[n] = bh; n++; } else { /* Normalize */ while ( (rp[n-1] | bp[n-1]) == 0) n--; } return n; }
long _fmpr_mul_mpn(fmpr_t z, mp_srcptr xman, mp_size_t xn, const fmpz_t xexp, mp_srcptr yman, mp_size_t yn, const fmpz_t yexp, int negative, long prec, fmpr_rnd_t rnd) { long zn, alloc, ret, shift; mp_limb_t tmp_stack[MUL_STACK_ALLOC]; mp_ptr tmp; zn = xn + yn; alloc = zn; MUL_TMP_ALLOC if (yn == 1) { mp_limb_t cy = mpn_mul_1(tmp, xman, xn, yman[0]); tmp[zn - 1] = cy; zn = zn - (cy == 0); } else { mpn_mul(tmp, xman, xn, yman, yn); zn = zn - (tmp[zn - 1] == 0); } ret = _fmpr_set_round_mpn(&shift, fmpr_manref(z), tmp, zn, negative, prec, rnd); fmpz_add2_fmpz_si_inline(fmpr_expref(z), xexp, yexp, shift); MUL_TMP_FREE return ret; }
/* Computes R -= A * B. Result must be non-negative. Normalized down to size an, and resulting size is returned. */ static mp_size_t submul (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn) { mp_ptr tp; TMP_DECL; ASSERT (bn > 0); ASSERT (an >= bn); ASSERT (rn >= an); ASSERT (an + bn <= rn + 1); TMP_MARK; tp = TMP_ALLOC_LIMBS (an + bn); mpn_mul (tp, ap, an, bp, bn); if (an + bn > rn) { ASSERT (tp[rn] == 0); bn--; } ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn)); TMP_FREE; while (rn > an && (rp[rn-1] == 0)) rn--; return rn; }
void mpfq_p_127_735_field_init(mpfq_p_127_735_dst_field k, mp_limb_t *p) { k->p = (mp_limb_t *)malloc(2*sizeof(mp_limb_t)); k->bigmul_p = (mp_limb_t *)malloc(5*sizeof(mp_limb_t)); if ((!k->p) || (!k->bigmul_p)) MALLOC_FAILED(); { int i; k->p[0] = -735UL; for (i = 1; i < (2-1); ++i) k->p[i] = -1UL; k->p[2-1] = (-1UL) >> 1; // 2^(w-1) - 1 where w is 32 or 64 } k->kl = 2; k->url = 5; k->url_margin = LONG_MAX; k->type = CLASSICAL_REP; mpz_init(k->factor); // precompute bigmul_p = largest multiple of p that fits in an elt_ur // p*Floor( (2^(5*GMP_LIMB_BITS)-1)/p ) { mpfq_p_127_735_elt_ur big; mp_limb_t q[5-2+1], r[2], tmp[5+1]; int i; for (i = 0; i < 5; ++i) big[i] = ~0UL; mpn_tdiv_qr(q, r, 0, big, 5, k->p, 2); mpn_mul(tmp, q, 5-2+1, k->p, 2); for (i = 0; i < 5; ++i) (k->bigmul_p)[i] = tmp[i]; assert (tmp[5] == 0UL); } }
mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, gmp_pi1_t *dinv, mp_ptr tp) { mp_size_t lo, hi; mp_limb_t cy, qh, ql; lo = n >> 1; /* floor(n/2) */ hi = n - lo; /* ceil(n/2) */ if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD)) qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32); else qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp); mpn_mul (tp, qp + lo, hi, dp, lo); cy = mpn_sub_n (np + lo, np + lo, tp, n); if (qh != 0) cy += mpn_sub_n (np + n, np + n, dp, lo); while (cy != 0) { qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1); cy -= mpn_add_n (np + lo, np + lo, dp, n); } if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD)) ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32); else ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp); mpn_mul (tp, dp, hi, qp, lo); cy = mpn_sub_n (np, np, tp, n); if (ql != 0) cy += mpn_sub_n (np + lo, np + lo, dp, hi); while (cy != 0) { mpn_sub_1 (qp, qp, lo, 1); cy -= mpn_add_n (np, np, dp, n); } return qh; }
void gmp_wrap_sb_mul(char * n1l, char * n2l, char * resl) { WORD_PTR_TYPE n1 = TO_WORD_PTR(n1l); mp_size_t l1 = BIGNUM_LENGTH(n1); WORD_PTR_TYPE n2 = TO_WORD_PTR(n2l); mp_size_t l2 = BIGNUM_LENGTH(n2); WORD_PTR_TYPE res = TO_WORD_PTR(resl); mpn_mul(res, n1, l1, n2, l2); }
void mpn_binvert (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr scratch) { mp_ptr xp; mp_size_t rn, newrn; mp_size_t sizes[NPOWS], *sizp; mp_limb_t di; /* Compute the computation precisions from highest to lowest, leaving the base case size in 'rn'. */ sizp = sizes; for (rn = n; ABOVE_THRESHOLD (rn, BINV_NEWTON_THRESHOLD); rn = (rn + 1) >> 1) *sizp++ = rn; xp = scratch; /* Compute a base value using a low-overhead O(n^2) algorithm. FIXME: We should call some divide-and-conquer lsb division function here for an operand subrange. */ MPN_ZERO (xp, rn); xp[0] = 1; binvert_limb (di, up[0]); if (BELOW_THRESHOLD (rn, DC_BDIV_Q_THRESHOLD)) mpn_sb_bdiv_q (rp, xp, rn, up, rn, -di); else mpn_dc_bdiv_q (rp, xp, rn, up, rn, -di); /* Use Newton iterations to get the desired precision. */ for (; rn < n; rn = newrn) { newrn = *--sizp; #if WANT_FFT if (ABOVE_THRESHOLD (newrn, 2 * MUL_FFT_MODF_THRESHOLD)) { int k; mp_size_t m, i; k = mpn_fft_best_k (newrn, 0); m = mpn_fft_next_size (newrn, k); mpn_mul_fft (xp, m, up, newrn, rp, rn, k); for (i = rn - 1; i >= 0; i--) if (xp[i] > (i == 0)) { mpn_add_1 (xp + rn, xp + rn, newrn - rn, 1); break; } } else #endif mpn_mul (xp, up, newrn, rp, rn); mpn_mullow_n (rp + rn, rp, xp + rn, newrn - rn); mpn_neg_n (rp + rn, rp + rn, newrn - rn); } }
/* Set (u0, u1) = (u0, u1) M Requires temporary space un + un + M->n = 2*un + M->n */ void ngcdext_cofactor_adjust(mp_ptr u0, mp_ptr u1, mp_size_t * un, struct ngcd_matrix *M, mp_ptr tp) { /* Let M = (r00, r01) (r10, r11) We want u0 = u0 * r00 + u1 * r10 u1 = u0 * r01 + u1 * r11 We make a copy of u0 at tp and update u0 first */ mp_limb_t cy, cy2; mp_ptr t2p =(tp + (*un)); /* second temporary space */ ASSERT(tp > M->p[1][1] + M->n); MPN_COPY(tp, u0, *un); if (M->n >= (*un)) { mpn_mul(t2p, M->p[1][0], M->n, u1, *un); /* t2p = r10 * u1 */ mpn_mul(u0, M->p[0][0], M->n, tp, *un); /* u0 = r00 * u0 */ } else { mpn_mul(t2p, u1, *un, M->p[1][0], M->n); mpn_mul(u0, tp, *un, M->p[0][0], M->n); } cy = mpn_add_n(u0, u0, t2p, M->n + (*un)); /* u0 += t2p */ if (M->n >= (*un)) { mpn_mul(t2p, M->p[1][1], M->n, u1, *un); /* t2p = r11 * u1 */ mpn_mul(u1, M->p[0][1], M->n, tp, *un); /* u1 = r01 * u0 */ } else { mpn_mul(t2p, u1, *un, M->p[1][1], M->n); mpn_mul(u1, tp, *un, M->p[0][1], M->n); } cy2 = mpn_add_n(u1, u1, t2p, M->n + (*un)); /* u1 += t2p */ if ((cy) || (cy2)) /* normalise u0, u1 */ { u0[M->n + (*un)] = cy; u1[M->n + (*un)] = cy2; (*un) += (M->n + 1); } else { (*un) += M->n; while ((u0[*un - 1] == 0) && (u1[*un - 1] == 0)) (*un)--; /* both cannot be zero, so this won't overrun */ } }
int test_F_mpn_mul() { mp_limb_t * int1, * int2, * product, * product2; mp_limb_t msl, msl2; int result = 1; unsigned long count; for (count = 0; (count < 30) && (result == 1); count++) { unsigned long limbs2 = randint(2*FLINT_FFT_LIMBS_CROSSOVER)+1; unsigned long limbs1 = limbs2 + randint(1000); int1 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs1); mpn_random2(int1, limbs1); unsigned long count2; for (count2 = 0; (count2 < 30) && (result == 1); count2++) { #if DEBUG printf("%ld, %ld\n",limbs1, limbs2); #endif int2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*limbs2); product = (mp_limb_t *) malloc(sizeof(mp_limb_t)*(limbs1+limbs2)); product2 = (mp_limb_t *) malloc(sizeof(mp_limb_t)*(limbs1+limbs2)); F_mpn_clear(int2, limbs2); mpn_random2(int2, randint(limbs2-1)+1); msl = F_mpn_mul(product, int1, limbs1, int2, limbs2); msl2 = mpn_mul(product2, int1, limbs1, int2, limbs2); unsigned long j; for (j = 0; j < limbs1+limbs2 - (msl == 0); j++) { if (product[j] != product2[j]) result = 0; } result &= (msl == msl2); free(product2); free(product); free(int2); } free(int1); } return result; }
void gcdext_get_t(mp_ptr t, mp_size_t * tn, mp_ptr gp, mp_size_t gn, mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n, mp_ptr s, mp_size_t sn, mp_ptr tp) { mp_size_t ss = ABS(sn); mp_limb_t cy; if (ss >= an) mpn_mul(tp, s, ss, ap, an); else mpn_mul(tp, ap, an, s, ss); (*tn) = ss + an; (*tn) -= (tp[(*tn) - 1] == 0); /* We must have s*ap >= gp and we really want to compute -t */ if (sn > 0) { mpn_sub(tp, tp, *tn, gp, gn); MPN_NORMALIZE(tp, (*tn)); } else { cy = mpn_add(tp, tp, *tn, gp, gn); if (cy) tp[(*tn)++] = cy; } if ((*tn) == 0) { return; } mpn_tdiv_qr(t, tp, 0, tp, (*tn), bp, n); ASSERT_MPN_ZERO_P(tp, n); (*tn) -= (n - 1); (*tn) -= (t[(*tn) - 1] == 0); }
void my_mpz_mul (mpz_t r, mpz_srcptr a, mpz_srcptr b) { mp_limb_t *tp; mp_size_t tn, an, bn; an = mpz_size (a); bn = mpz_size (b); assert (an > 0); assert (bn > 0); tn = an + bn; tp = mpz_limbs_write (r, tn); if (an > bn) mpn_mul (tp, mpz_limbs_read (a), an, mpz_limbs_read (b), bn); else mpn_mul (tp, mpz_limbs_read (b), bn, mpz_limbs_read (a), an); if (mpz_sgn (a) != mpz_sgn(b)) tn = - tn; mpz_limbs_finish (r, tn); }
static void ref_matrix22_mul (struct matrix *R, const struct matrix *A, const struct matrix *B, mp_ptr tp) { mp_size_t an, bn, n; mp_ptr r00, r01, r10, r11, a00, a01, a10, a11, b00, b01, b10, b11; if (A->n >= B->n) { r00 = R->e00; a00 = A->e00; b00 = B->e00; r01 = R->e01; a01 = A->e01; b01 = B->e01; r10 = R->e10; a10 = A->e10; b10 = B->e10; r11 = R->e11; a11 = A->e11; b11 = B->e11; an = A->n, bn = B->n; } else { /* Transpose */ r00 = R->e00; a00 = B->e00; b00 = A->e00; r01 = R->e10; a01 = B->e10; b01 = A->e10; r10 = R->e01; a10 = B->e01; b10 = A->e01; r11 = R->e11; a11 = B->e11; b11 = A->e11; an = B->n, bn = A->n; } n = an + bn; R->n = n + 1; mpn_mul (r00, a00, an, b00, bn); mpn_mul (tp, a01, an, b10, bn); r00[n] = mpn_add_n (r00, r00, tp, n); mpn_mul (r01, a00, an, b01, bn); mpn_mul (tp, a01, an, b11, bn); r01[n] = mpn_add_n (r01, r01, tp, n); mpn_mul (r10, a10, an, b00, bn); mpn_mul (tp, a11, an, b10, bn); r10[n] = mpn_add_n (r10, r10, tp, n); mpn_mul (r11, a10, an, b01, bn); mpn_mul (tp, a11, an, b11, bn); r11[n] = mpn_add_n (r11, r11, tp, n); }
void mpn_matrix22_mul (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn, mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn, mp_ptr tp) { if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD) || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD)) { mp_ptr p0, p1; unsigned i; /* Temporary storage: 3 rn + 2 mn */ p0 = tp + rn; p1 = p0 + rn + mn; for (i = 0; i < 2; i++) { MPN_COPY (tp, r0, rn); if (rn >= mn) { mpn_mul (p0, r0, rn, m0, mn); mpn_mul (p1, r1, rn, m3, mn); mpn_mul (r0, r1, rn, m2, mn); mpn_mul (r1, tp, rn, m1, mn); } else { mpn_mul (p0, m0, mn, r0, rn); mpn_mul (p1, m3, mn, r1, rn); mpn_mul (r0, m2, mn, r1, rn); mpn_mul (r1, m1, mn, tp, rn); } r0[rn+mn] = mpn_add_n (r0, r0, p0, rn + mn); r1[rn+mn] = mpn_add_n (r1, r1, p1, rn + mn); r0 = r2; r1 = r3; } } else mpn_matrix22_mul_strassen (r0, r1, r2, r3, rn, m0, m1, m2, m3, mn, tp); }
void _nmod_poly_mul_KS(mp_ptr out, mp_srcptr in1, long len1, mp_srcptr in2, long len2, mp_bitcnt_t bits, nmod_t mod) { long len_out = len1 + len2 - 1, limbs1, limbs2; mp_ptr mpn1, mpn2, res; if (bits == 0) { mp_bitcnt_t bits1, bits2, loglen; bits1 = _nmod_vec_max_bits(in1, len1); bits2 = (in1 == in2) ? bits1 : _nmod_vec_max_bits(in2, len2); loglen = FLINT_BIT_COUNT(len2); bits = bits1 + bits2 + loglen; } limbs1 = (len1 * bits - 1) / FLINT_BITS + 1; limbs2 = (len2 * bits - 1) / FLINT_BITS + 1; mpn1 = (mp_ptr) malloc(sizeof(mp_limb_t) * limbs1); mpn2 = (in1 == in2) ? mpn1 : (mp_ptr) malloc(sizeof(mp_limb_t) * limbs2); _nmod_poly_bit_pack(mpn1, in1, len1, bits); if (in1 != in2) _nmod_poly_bit_pack(mpn2, in2, len2, bits); res = (mp_ptr) malloc(sizeof(mp_limb_t) * (limbs1 + limbs2)); if (in1 != in2) mpn_mul(res, mpn1, limbs1, mpn2, limbs2); else mpn_mul_n(res, mpn1, mpn1, limbs1); _nmod_poly_bit_unpack(out, len_out, res, bits, mod); free(mpn2); if (in1 != in2) free(mpn1); free(res); }
mp_limb_t mpn_dc_divappr_q_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_srcptr dip, mp_ptr tp) { mp_size_t lo, hi; mp_limb_t cy, qh, ql; lo = n >> 1; /* floor(n/2) */ hi = n - lo; /* ceil(n/2) */ if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD)) qh = mpn_sb_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dip); else qh = mpn_dc_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dip, tp); mpn_mul (tp, qp + lo, hi, dp, lo); cy = mpn_sub_n (np + lo, np + lo, tp, n); if (qh != 0) cy += mpn_sub_n (np + n, np + n, dp, lo); while (cy != 0) { qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1); cy -= mpn_add_n (np + lo, np + lo, dp, n); } if (BELOW_THRESHOLD (lo, DC_DIVAPPR_Q_THRESHOLD)) ql = mpn_sb_divappr_q (qp, np + hi, 2 * lo, dp + hi, lo, dip); else ql = mpn_dc_divappr_q_n (qp, np + hi, dp + hi, lo, dip, tp); if (UNLIKELY (ql != 0)) { mp_size_t i; for (i = 0; i < lo; i++) qp[i] = GMP_NUMB_MASK; } return qh; }
mp_limb_t mpn_dcpi1_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv) { mp_size_t qn; mp_limb_t qh, cy, qsave; mp_ptr tp; TMP_DECL; TMP_MARK; ASSERT (dn >= 6); ASSERT (nn > dn); ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); qn = nn - dn; qp += qn; np += nn; dp += dn; if (qn >= dn) { qn++; /* pretend we'll need an extra limb */ /* Reduce qn mod dn without division, optimizing small operations. */ do qn -= dn; while (qn > dn); qp -= qn; /* point at low limb of next quotient block */ np -= qn; /* point in the middle of partial remainder */ tp = TMP_SALLOC_LIMBS (dn); /* Perform the typically smaller block first. */ if (qn == 1) { mp_limb_t q, n2, n1, n0, d1, d0; /* Handle qh up front, for simplicity. */ qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0; if (qh) ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn)); /* A single iteration of schoolbook: One 3/2 division, followed by the bignum update and adjustment. */ n2 = np[0]; n1 = np[-1]; n0 = np[-2]; d1 = dp[-1]; d0 = dp[-2]; ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0)); if (UNLIKELY (n2 == d1) && n1 == d0) { q = GMP_NUMB_MASK; cy = mpn_submul_1 (np - dn, dp - dn, dn, q); ASSERT (cy == n2); } else { udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv->inv32); if (dn > 2) { mp_limb_t cy, cy1; cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q); cy1 = n0 < cy; n0 = (n0 - cy) & GMP_NUMB_MASK; cy = n1 < cy1; n1 = (n1 - cy1) & GMP_NUMB_MASK; np[-2] = n0; if (UNLIKELY (cy != 0)) { n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1); qh -= (q == 0); q = (q - 1) & GMP_NUMB_MASK; } } else np[-2] = n0; np[-1] = n1; } qp[0] = q; } else { if (qn == 2) qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) qh = mpn_sbpi1_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv->inv32); else qh = mpn_dcpi1_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp); if (qn != dn) { if (qn > dn - qn) mpn_mul (tp, qp, qn, dp - dn, dn - qn); else mpn_mul (tp, dp - dn, dn - qn, qp, qn); cy = mpn_sub_n (np - dn, np - dn, tp, dn); if (qh != 0) cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); while (cy != 0) { qh -= mpn_sub_1 (qp, qp, qn, 1); cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); } } } qn = nn - dn - qn + 1; while (qn > dn) { qp -= dn; np -= dn; mpn_dcpi1_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp); qn -= dn; } /* Since we pretended we'd need an extra quotient limb before, we now have made sure the code above left just dn-1=qn quotient limbs to develop. Develop that plus a guard limb. */ qn--; qp -= qn; np -= dn; qsave = qp[qn]; mpn_dcpi1_divappr_q_n (qp, np - dn, dp - dn, dn, dinv, tp); MPN_COPY_INCR (qp, qp + 1, qn); qp[qn] = qsave; } else /* (qn < dn) */ { mp_ptr q2p; #if 0 /* not possible since we demand nn > dn */ if (qn == 0) { qh = mpn_cmp (np - dn, dp - dn, dn) >= 0; if (qh) mpn_sub_n (np - dn, np - dn, dp - dn, dn); TMP_FREE; return qh; } #endif qp -= qn; /* point at low limb of next quotient block */ np -= qn; /* point in the middle of partial remainder */ q2p = TMP_SALLOC_LIMBS (qn + 1); /* Should we at all check DC_DIVAPPR_Q_THRESHOLD here, or reply on callers not to be silly? */ if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD)) { qh = mpn_sbpi1_divappr_q (q2p, np - qn - 2, 2 * (qn + 1), dp - (qn + 1), qn + 1, dinv->inv32); } else { /* It is tempting to use qp for recursive scratch and put quotient in tp, but the recursive scratch needs one limb too many. */ tp = TMP_SALLOC_LIMBS (qn + 1); qh = mpn_dcpi1_divappr_q_n (q2p, np - qn - 2, dp - (qn + 1), qn + 1, dinv, tp); } MPN_COPY (qp, q2p + 1, qn); } TMP_FREE; return qh; }
void mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m) { mp_ptr xp, tp, qp, mp, bp; mp_size_t xn, tn, mn, bn; int m_zero_cnt; int c; mp_limb_t e; TMP_DECL; mp = PTR(m); mn = ABSIZ(m); if (mn == 0) DIVIDE_BY_ZERO; if (el == 0) { /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending on if MOD equals 1. */ SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1; PTR(r)[0] = 1; return; } TMP_MARK; /* Normalize m (i.e. make its most significant bit set) as required by division functions below. */ count_leading_zeros (m_zero_cnt, mp[mn - 1]); m_zero_cnt -= GMP_NAIL_BITS; if (m_zero_cnt != 0) { mp_ptr new_mp = TMP_ALLOC_LIMBS (mn); mpn_lshift (new_mp, mp, mn, m_zero_cnt); mp = new_mp; } bn = ABSIZ(b); bp = PTR(b); if (bn > mn) { /* Reduce possibly huge base. Use a function call to reduce, since we don't want the quotient allocation to live until function return. */ mp_ptr new_bp = TMP_ALLOC_LIMBS (mn); reduce (new_bp, bp, bn, mp, mn); bp = new_bp; bn = mn; /* Canonicalize the base, since we are potentially going to multiply with it quite a few times. */ MPN_NORMALIZE (bp, bn); } if (bn == 0) { SIZ(r) = 0; TMP_FREE; return; } tp = TMP_ALLOC_LIMBS (2 * mn + 1); xp = TMP_ALLOC_LIMBS (mn); qp = TMP_ALLOC_LIMBS (mn + 1); MPN_COPY (xp, bp, bn); xn = bn; e = el; count_leading_zeros (c, e); e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ c = BITS_PER_MP_LIMB - 1 - c; /* Main loop. */ /* If m is already normalized (high bit of high limb set), and b is the same size, but a bigger value, and e==1, then there's no modular reductions done and we can end up with a result out of range at the end. */ if (c == 0) { if (xn == mn && mpn_cmp (xp, mp, mn) >= 0) mpn_sub_n (xp, xp, mp, mn); goto finishup; } while (c != 0) { mpn_sqr_n (tp, xp, xn); tn = 2 * xn; tn -= tp[tn - 1] == 0; if (tn < mn) { MPN_COPY (xp, tp, tn); xn = tn; } else { mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn); xn = mn; } if ((mp_limb_signed_t) e < 0) { mpn_mul (tp, xp, xn, bp, bn); tn = xn + bn; tn -= tp[tn - 1] == 0; if (tn < mn) { MPN_COPY (xp, tp, tn); xn = tn; } else { mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn); xn = mn; } } e <<= 1; c--; } finishup: /* We shifted m left m_zero_cnt steps. Adjust the result by reducing it with the original MOD. */ if (m_zero_cnt != 0) { mp_limb_t cy; cy = mpn_lshift (tp, xp, xn, m_zero_cnt); tp[xn] = cy; xn += cy != 0; if (xn < mn) { MPN_COPY (xp, tp, xn); } else { mpn_tdiv_qr (qp, xp, 0L, tp, xn, mp, mn); xn = mn; } mpn_rshift (xp, xp, xn, m_zero_cnt); } MPN_NORMALIZE (xp, xn); if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0) { mp = PTR(m); /* want original, unnormalized m */ mpn_sub (xp, mp, mn, xp, xn); xn = mn; MPN_NORMALIZE (xp, xn); } MPZ_REALLOC (r, xn); SIZ (r) = xn; MPN_COPY (PTR(r), xp, xn); TMP_FREE; }
mp_limb_t mpn_dc_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { mp_size_t qn; mp_limb_t qh, cy; mp_ptr tp; TMP_DECL; TMP_MARK; ASSERT (dn >= 6); /* to adhere to mpn_sb_div_qr's limits */ ASSERT (nn - dn >= 3); /* to adhere to mpn_sb_div_qr's limits */ ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT); tp = TMP_ALLOC_LIMBS (DC_DIVAPPR_Q_N_ITCH(dn)); qn = nn - dn; qp += qn; np += nn; dp += dn; if (qn > dn) { /* Reduce qn mod dn without division, optimizing small operations. */ do qn -= dn; while (qn > dn); qp -= qn; /* point at low limb of next quotient block */ np -= qn; /* point in the middle of partial remainder */ /* Perform the typically smaller block first. */ if (qn == 1) { mp_limb_t q, n2, n1, n0, d1, d0, d11, d01; /* Handle qh up front, for simplicity. */ qh = mpn_cmp (np - dn + 1, dp - dn, dn) >= 0; if (qh) ASSERT_NOCARRY (mpn_sub_n (np - dn + 1, np - dn + 1, dp - dn, dn)); /* A single iteration of schoolbook: One 3/2 division, followed by the bignum update and adjustment. */ n2 = np[0]; n1 = np[-1]; n0 = np[-2]; d1 = dp[-1]; d0 = dp[-2]; d01 = d0 + 1; d11 = d1 + (d01 < d0); ASSERT (n2 < d1 || (n2 == d1 && n1 <= d0)); if (UNLIKELY (n2 == d1) && n1 == d0) { q = GMP_NUMB_MASK; cy = mpn_submul_1 (np - dn, dp - dn, dn, q); ASSERT (cy == n2); } else { mpir_divrem32_preinv2 (q, n1, n0, n2, n1, n0, d11, d01, d1, d0, dinv); if (dn > 2) { mp_limb_t cy, cy1; cy = mpn_submul_1 (np - dn, dp - dn, dn - 2, q); cy1 = n0 < cy; n0 = (n0 - cy) & GMP_NUMB_MASK; cy = n1 < cy1; n1 = (n1 - cy1) & GMP_NUMB_MASK; np[-2] = n0; if (UNLIKELY (cy != 0)) { n1 += d1 + mpn_add_n (np - dn, np - dn, dp - dn, dn - 1); qh -= (q == 0); q = (q - 1) & GMP_NUMB_MASK; } } else np[-2] = n0; np[-1] = n1; } qp[0] = q; } else { /* Do a 2qn / qn division */ if (qn == 2) qh = mpn_divrem_2 (qp, 0L, np - 2, 4, dp - 2); /* FIXME: obsolete function. Use 5/3 division? */ else if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv); else qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp); if (qn != dn) { if (qn > dn - qn) mpn_mul (tp, qp, qn, dp - dn, dn - qn); else mpn_mul (tp, dp - dn, dn - qn, qp, qn); cy = mpn_sub_n (np - dn, np - dn, tp, dn); if (qh != 0) cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); while (cy != 0) { qh -= mpn_sub_1 (qp, qp, qn, 1); cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); } } } qn = nn - dn - qn; do { qp -= dn; np -= dn; ASSERT_NOCARRY(mpn_dc_div_qr_n (qp, np - dn, dp - dn, dn, dinv, tp)); qn -= dn; } while (qn > 0); } else { qp -= qn; /* point at low limb of next quotient block */ np -= qn; /* point in the middle of partial remainder */ if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) qh = mpn_sb_div_qr (qp, np - qn, 2 * qn, dp - qn, qn, dinv); else qh = mpn_dc_div_qr_n (qp, np - qn, dp - qn, qn, dinv, tp); if (qn != dn) { if (qn > dn - qn) mpn_mul (tp, qp, qn, dp - dn, dn - qn); else mpn_mul (tp, dp - dn, dn - qn, qp, qn); cy = mpn_sub_n (np - dn, np - dn, tp, dn); if (qh != 0) cy += mpn_sub_n (np - dn + qn, np - dn + qn, dp - dn, dn - qn); while (cy != 0) { qh -= mpn_sub_1 (qp, qp, qn, 1); cy -= mpn_add_n (np - dn, np - dn, dp - dn, dn); } } } TMP_FREE; return qh; }
/* Computes {rp,MIN(rn,an+bn)} <- {ap,an}*{bp,bn} Mod(B^rn-1) * * The result is expected to be ZERO if and only if one of the operand * already is. Otherwise the class [0] Mod(B^rn-1) is represented by * B^rn-1. This should not be a problem if mulmod_bnm1 is used to * combine results and obtain a natural number when one knows in * advance that the final value is less than (B^rn-1). * Moreover it should not be a problem if mulmod_bnm1 is used to * compute the full product with an+bn <= rn, because this condition * implies (B^an-1)(B^bn-1) < (B^rn-1) . * * Requires 0 < bn <= an <= rn and an + bn > rn/2 * Scratch need: rn + (need for recursive call OR rn + 4). This gives * * S(n) <= rn + MAX (rn + 4, S(n/2)) <= 2rn + 4 */ void mpn_mulmod_bnm1 (mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, mp_ptr tp) { ASSERT (0 < bn); ASSERT (bn <= an); ASSERT (an <= rn); if ((rn & 1) != 0 || BELOW_THRESHOLD (rn, MULMOD_BNM1_THRESHOLD)) { if (UNLIKELY (bn < rn)) { if (UNLIKELY (an + bn <= rn)) { mpn_mul (rp, ap, an, bp, bn); } else { mp_limb_t cy; mpn_mul (tp, ap, an, bp, bn); cy = mpn_add (rp, tp, rn, tp + rn, an + bn - rn); MPN_INCR_U (rp, rn, cy); } } else mpn_bc_mulmod_bnm1 (rp, ap, bp, rn, tp); } else { mp_size_t n; mp_limb_t cy; mp_limb_t hi; n = rn >> 1; /* We need at least an + bn >= n, to be able to fit one of the recursive products at rp. Requiring strict inequality makes the coded slightly simpler. If desired, we could avoid this restriction by initially halving rn as long as rn is even and an + bn <= rn/2. */ ASSERT (an + bn > n); /* Compute xm = a*b mod (B^n - 1), xp = a*b mod (B^n + 1) and crt together as x = -xp * B^n + (B^n + 1) * [ (xp + xm)/2 mod (B^n-1)] */ #define a0 ap #define a1 (ap + n) #define b0 bp #define b1 (bp + n) #define xp tp /* 2n + 2 */ /* am1 maybe in {xp, n} */ /* bm1 maybe in {xp + n, n} */ #define sp1 (tp + 2*n + 2) /* ap1 maybe in {sp1, n + 1} */ /* bp1 maybe in {sp1 + n + 1, n + 1} */ { mp_srcptr am1, bm1; mp_size_t anm, bnm; mp_ptr so; bm1 = b0; bnm = bn; if (LIKELY (an > n)) { am1 = xp; cy = mpn_add (xp, a0, n, a1, an - n); MPN_INCR_U (xp, n, cy); anm = n; so = xp + n; if (LIKELY (bn > n)) { bm1 = so; cy = mpn_add (so, b0, n, b1, bn - n); MPN_INCR_U (so, n, cy); bnm = n; so += n; } } else { so = xp; am1 = a0; anm = an; } mpn_mulmod_bnm1 (rp, n, am1, anm, bm1, bnm, so); } { int k; mp_srcptr ap1, bp1; mp_size_t anp, bnp; bp1 = b0; bnp = bn; if (LIKELY (an > n)) { ap1 = sp1; cy = mpn_sub (sp1, a0, n, a1, an - n); sp1[n] = 0; MPN_INCR_U (sp1, n + 1, cy); anp = n + ap1[n]; if (LIKELY (bn > n)) { bp1 = sp1 + n + 1; cy = mpn_sub (sp1 + n + 1, b0, n, b1, bn - n); sp1[2*n+1] = 0; MPN_INCR_U (sp1 + n + 1, n + 1, cy); bnp = n + bp1[n]; } } else { ap1 = a0; anp = an; } if (BELOW_THRESHOLD (n, MUL_FFT_MODF_THRESHOLD)) k=0; else { int mask; k = mpn_fft_best_k (n, 0); mask = (1<<k) - 1; while (n & mask) {k--; mask >>=1;}; } if (k >= FFT_FIRST_K) xp[n] = mpn_mul_fft (xp, n, ap1, anp, bp1, bnp, k); else if (UNLIKELY (bp1 == b0)) { ASSERT (anp + bnp <= 2*n+1); ASSERT (anp + bnp > n); ASSERT (anp >= bnp); mpn_mul (xp, ap1, anp, bp1, bnp); anp = anp + bnp - n; ASSERT (anp <= n || xp[2*n]==0); anp-= anp > n; cy = mpn_sub (xp, xp, n, xp + n, anp); xp[n] = 0; MPN_INCR_U (xp, n+1, cy); } else mpn_bc_mulmod_bnp1 (xp, ap1, bp1, n, xp); } /* Here the CRT recomposition begins. xm <- (xp + xm)/2 = (xp + xm)B^n/2 mod (B^n-1) Division by 2 is a bitwise rotation. Assumes xp normalised mod (B^n+1). The residue class [0] is represented by [B^n-1]; except when both input are ZERO. */ #if HAVE_NATIVE_mpn_rsh1add_n || HAVE_NATIVE_mpn_rsh1add_nc #if HAVE_NATIVE_mpn_rsh1add_nc cy = mpn_rsh1add_nc(rp, rp, xp, n, xp[n]); /* B^n = 1 */ hi = cy << (GMP_NUMB_BITS - 1); cy = 0; /* next update of rp[n-1] will set cy = 1 only if rp[n-1]+=hi overflows, i.e. a further increment will not overflow again. */ #else /* ! _nc */ cy = xp[n] + mpn_rsh1add_n(rp, rp, xp, n); /* B^n = 1 */ hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */ cy >>= 1; /* cy = 1 only if xp[n] = 1 i.e. {xp,n} = ZERO, this implies that the rsh1add was a simple rshift: the top bit is 0. cy=1 => hi=0. */ #endif #if GMP_NAIL_BITS == 0 add_ssaaaa(cy, rp[n-1], cy, rp[n-1], 0, hi); #else cy += (hi & rp[n-1]) >> (GMP_NUMB_BITS-1); rp[n-1] ^= hi; #endif #else /* ! HAVE_NATIVE_mpn_rsh1add_n */ #if HAVE_NATIVE_mpn_add_nc cy = mpn_add_nc(rp, rp, xp, n, xp[n]); #else /* ! _nc */ cy = xp[n] + mpn_add_n(rp, rp, xp, n); /* xp[n] == 1 implies {xp,n} == ZERO */ #endif cy += (rp[0]&1); mpn_rshift(rp, rp, n, 1); ASSERT (cy <= 2); hi = (cy<<(GMP_NUMB_BITS-1))&GMP_NUMB_MASK; /* (cy&1) << ... */ cy >>= 1; /* We can have cy != 0 only if hi = 0... */ ASSERT ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0); rp[n-1] |= hi; /* ... rp[n-1] + cy can not overflow, the following INCR is correct. */ #endif ASSERT (cy <= 1); /* Next increment can not overflow, read the previous comments about cy. */ ASSERT ((cy == 0) || ((rp[n-1] & GMP_NUMB_HIGHBIT) == 0)); MPN_INCR_U(rp, n, cy); /* Compute the highest half: ([(xp + xm)/2 mod (B^n-1)] - xp ) * B^n */ if (UNLIKELY (an + bn < rn)) { /* Note that in this case, the only way the result can equal zero mod B^{rn} - 1 is if one of the inputs is zero, and then the output of both the recursive calls and this CRT reconstruction is zero, not B^{rn} - 1. Which is good, since the latter representation doesn't fit in the output area.*/ cy = mpn_sub_n (rp + n, rp, xp, an + bn - n); /* FIXME: This subtraction of the high parts is not really necessary, we do it to get the carry out, and for sanity checking. */ cy = xp[n] + mpn_sub_nc (xp + an + bn - n, rp + an + bn - n, xp + an + bn - n, rn - (an + bn), cy); ASSERT (an + bn == rn - 1 || mpn_zero_p (xp + an + bn - n + 1, rn - 1 - (an + bn))); cy = mpn_sub_1 (rp, rp, an + bn, cy); ASSERT (cy == (xp + an + bn - n)[0]); } else { cy = xp[n] + mpn_sub_n (rp + n, rp, xp, n); /* cy = 1 only if {xp,n+1} is not ZERO, i.e. {rp,n} is not ZERO. DECR will affect _at most_ the lowest n limbs. */ MPN_DECR_U (rp, 2*n, cy); } #undef a0 #undef a1 #undef b0 #undef b1 #undef xp #undef sp1 } }
/* Exact product. The number a is assumed to have enough allocated memory, where the trailing bits are regarded as being part of the input numbers (no reallocation is attempted and no check is performed as MPFR_TMP_INIT could have been used). The arguments b and c may actually be UBF numbers (mpfr_srcptr can be seen a bit like void *, but is stronger). This function does not change the flags, except in case of NaN. */ void mpfr_ubf_mul_exact (mpfr_ubf_ptr a, mpfr_srcptr b, mpfr_srcptr c) { MPFR_LOG_FUNC (("b[%Pu]=%.*Rg c[%Pu]=%.*Rg", mpfr_get_prec (b), mpfr_log_prec, b, mpfr_get_prec (c), mpfr_log_prec, c), ("a[%Pu]=%.*Rg", mpfr_get_prec (a), mpfr_log_prec, a)); MPFR_ASSERTD ((mpfr_ptr) a != b); MPFR_ASSERTD ((mpfr_ptr) a != c); MPFR_SIGN (a) = MPFR_MULT_SIGN (MPFR_SIGN (b), MPFR_SIGN (c)); if (MPFR_ARE_SINGULAR (b, c)) { if (MPFR_IS_NAN (b) || MPFR_IS_NAN (c)) MPFR_SET_NAN (a); else if (MPFR_IS_INF (b)) { if (MPFR_NOTZERO (c)) MPFR_SET_INF (a); else MPFR_SET_NAN (a); } else if (MPFR_IS_INF (c)) { if (!MPFR_IS_ZERO (b)) MPFR_SET_INF (a); else MPFR_SET_NAN (a); } else { MPFR_ASSERTD (MPFR_IS_ZERO(b) || MPFR_IS_ZERO(c)); MPFR_SET_ZERO (a); } } else { mpfr_exp_t e; mp_size_t bn, cn; mpfr_limb_ptr ap; mp_limb_t u, v; int m; /* Note about the code below: For the choice of the precision of * the result a, one could choose PREC(b) + PREC(c), instead of * taking whole limbs into account, but in most cases where one * would gain one limb, one would need to copy the significand * instead of a no-op (see the mul.c code). * But in the case MPFR_LIMB_MSB (u) == 0, if the result fits in * an-1 limbs, one could actually do * mpn_rshift (ap, ap, k, GMP_NUMB_BITS - 1) * instead of * mpn_lshift (ap, ap, k, 1) * to gain one limb (and reduce the precision), replacing a shift * by another one. Would this be interesting? */ bn = MPFR_LIMB_SIZE (b); cn = MPFR_LIMB_SIZE (c); ap = MPFR_MANT (a); u = (bn >= cn) ? mpn_mul (ap, MPFR_MANT (b), bn, MPFR_MANT (c), cn) : mpn_mul (ap, MPFR_MANT (c), cn, MPFR_MANT (b), bn); if (MPFR_UNLIKELY (MPFR_LIMB_MSB (u) == 0)) { m = 1; MPFR_DBGRES (v = mpn_lshift (ap, ap, bn + cn, 1)); MPFR_ASSERTD (v == 0); } else m = 0; if (! MPFR_IS_UBF (b) && ! MPFR_IS_UBF (c) && (e = MPFR_GET_EXP (b) + MPFR_GET_EXP (c) - m, MPFR_EXP_IN_RANGE (e))) { MPFR_SET_EXP (a, e); } else { mpz_t be, ce; mpz_init (MPFR_ZEXP (a)); /* This may involve copies of mpz_t, but exponents should not be very large integers anyway. */ mpfr_get_zexp (be, b); mpfr_get_zexp (ce, c); mpz_add (MPFR_ZEXP (a), be, ce); mpz_clear (be); mpz_clear (ce); mpz_sub_ui (MPFR_ZEXP (a), MPFR_ZEXP (a), m); MPFR_SET_UBF (a); } } }
/* Check schoolboy division routine. */ void check_sb_div_q (void) { mp_limb_t np[2*MAX_LIMBS]; mp_limb_t np2[2*MAX_LIMBS]; mp_limb_t rp[2*MAX_LIMBS+1]; mp_limb_t dp[MAX_LIMBS]; mp_limb_t qp[2*MAX_LIMBS]; mp_limb_t dip, cy; mp_size_t nn, rn, dn, qn; gmp_randstate_t rands; int i, j, s; gmp_randinit_default(rands); for (i = 0; i < ITERS; i++) { dn = (random() % (MAX_LIMBS - 2)) + 3; nn = (random() % MAX_LIMBS) + dn; mpn_rrandom (np, rands, nn); mpn_rrandom (dp, rands, dn); dp[dn-1] |= GMP_LIMB_HIGHBIT; MPN_COPY(np2, np, nn); mpir_invert_pi2(dip, dp[dn - 1], dp[dn - 2]); qn = nn - dn + 1; qp[qn - 1] = mpn_sb_div_q(qp, np, nn, dp, dn, dip); MPN_NORMALIZE(qp, qn); if (qn) { if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn); else mpn_mul(rp, dp, dn, qp, qn); rn = dn + qn; MPN_NORMALIZE(rp, rn); if (rn > nn) { printf("failed: q*d has too many limbs\n"); abort(); } if (mpn_cmp(rp, np2, nn) > 0) { printf("failed: remainder negative\n"); abort(); } mpn_sub(rp, np2, nn, rp, rn); rn = nn; MPN_NORMALIZE(rp, rn); } else { rn = nn; MPN_COPY(rp, np, nn); } s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn); if (s >= 0) { printf ("failed:\n"); printf ("nn = %lu, dn = %lu, qn = %lu, rn = %lu\n\n", nn, dn, qn, rn); gmp_printf (" np: %Nx\n\n", np2, nn); gmp_printf (" dp: %Nx\n\n", dp, dn); gmp_printf (" qp: %Nx\n\n", qp, qn); gmp_printf (" rp: %Nx\n\n", rp, rn); abort (); } } gmp_randclear(rands); }
void mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v) { mp_size_t usize; mp_size_t vsize; mp_size_t wsize; mp_size_t sign_product; mp_ptr up, vp; mp_ptr wp; mp_ptr free_me; size_t free_me_size; mp_limb_t cy_limb; TMP_DECL; usize = SIZ (u); vsize = SIZ (v); sign_product = usize ^ vsize; usize = ABS (usize); vsize = ABS (vsize); if (usize < vsize) { MPZ_SRCPTR_SWAP (u, v); MP_SIZE_T_SWAP (usize, vsize); } if (vsize == 0) { SIZ (w) = 0; return; } #if HAVE_NATIVE_mpn_mul_2 if (vsize <= 2) { wp = MPZ_REALLOC (w, usize+vsize); if (vsize == 1) cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]); else { cy_limb = mpn_mul_2 (wp, PTR (u), usize, PTR (v)); usize++; } wp[usize] = cy_limb; usize += (cy_limb != 0); SIZ (w) = (sign_product >= 0 ? usize : -usize); return; } #else if (vsize == 1) { wp = MPZ_REALLOC (w, usize+1); cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]); wp[usize] = cy_limb; usize += (cy_limb != 0); SIZ (w) = (sign_product >= 0 ? usize : -usize); return; } #endif TMP_MARK; free_me = NULL; up = PTR (u); vp = PTR (v); wp = PTR (w); /* Ensure W has space enough to store the result. */ wsize = usize + vsize; if (ALLOC (w) < wsize) { if (wp == up || wp == vp) { free_me = wp; free_me_size = ALLOC (w); } else (*__gmp_free_func) (wp, (size_t) ALLOC (w) * GMP_LIMB_BYTES); ALLOC (w) = wsize; wp = __GMP_ALLOCATE_FUNC_LIMBS (wsize); PTR (w) = wp; } else { /* Make U and V not overlap with W. */ if (wp == up) { /* W and U are identical. Allocate temporary space for U. */ up = TMP_ALLOC_LIMBS (usize); /* Is V identical too? Keep it identical with U. */ if (wp == vp) vp = up; /* Copy to the temporary space. */ MPN_COPY (up, wp, usize); } else if (wp == vp) { /* W and V are identical. Allocate temporary space for V. */ vp = TMP_ALLOC_LIMBS (vsize); /* Copy to the temporary space. */ MPN_COPY (vp, wp, vsize); } } if (up == vp) { mpn_sqr (wp, up, usize); cy_limb = wp[wsize - 1]; } else { cy_limb = mpn_mul (wp, up, usize, vp, vsize); } wsize -= cy_limb == 0; SIZ (w) = sign_product < 0 ? -wsize : wsize; if (free_me != NULL) (*__gmp_free_func) (free_me, free_me_size * GMP_LIMB_BYTES); TMP_FREE; }