/* Multiply M by M1 from the right. Since the M1 elements fit in GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs temporary space M->n */ static void ngcd_matrix_mul_1 (struct ngcd_matrix *M, const struct ngcd_matrix1 *M1) { unsigned row; mp_limb_t grow; for (row = 0, grow = 0; row < 2; row++) { mp_limb_t c0, c1; /* Compute (u, u') <-- (r00 u + r10 u', r01 u + r11 u') as t = u u *= r00 u += r10 * u' u' *= r11 u' += r01 * t */ MPN_COPY (M->tp, M->p[row][0], M->n); c0 = mpn_mul_1 (M->p[row][0], M->p[row][0], M->n, M1->u[0][0]); c0 += mpn_addmul_1 (M->p[row][0], M->p[row][1], M->n, M1->u[1][0]); M->p[row][0][M->n] = c0; c1 = mpn_mul_1 (M->p[row][1], M->p[row][1], M->n, M1->u[1][1]); c1 += mpn_addmul_1 (M->p[row][1], M->tp, M->n, M1->u[0][1]); M->p[row][1][M->n] = c1; grow |= (c0 | c1); } M->n += (grow != 0); ASSERT (M->n < M->alloc); }
/* Assumes poly1 and poly2 are not length 0 and 0 < trunc <= len1 + len2 - 1 */ void _nmod_poly_mullow_classical(mp_ptr res, mp_srcptr poly1, slong len1, mp_srcptr poly2, slong len2, slong trunc, nmod_t mod) { if (len1 == 1 || trunc == 1) /* Special case if the length of output is 1 */ { res[0] = n_mulmod2_preinv(poly1[0], poly2[0], mod.n, mod.ninv); } else /* Ordinary case */ { slong i; slong bits = FLINT_BITS - (slong) mod.norm; slong log_len = FLINT_BIT_COUNT(len2); if (2 * bits + log_len <= FLINT_BITS) { /* Set res[i] = poly1[i]*poly2[0] */ mpn_mul_1(res, poly1, FLINT_MIN(len1, trunc), poly2[0]); if (len2 != 1) { /* Set res[i+len1-1] = in1[len1-1]*in2[i] */ if (trunc > len1) mpn_mul_1(res + len1, poly2 + 1, trunc - len1, poly1[len1 - 1]); /* out[i+j] += in1[i]*in2[j] */ for (i = 0; i < FLINT_MIN(len1, trunc) - 1; i++) mpn_addmul_1(res + i + 1, poly2 + 1, FLINT_MIN(len2, trunc - i) - 1, poly1[i]); } _nmod_vec_reduce(res, res, trunc, mod); } else { /* Set res[i] = poly1[i]*poly2[0] */ _nmod_vec_scalar_mul_nmod(res, poly1, FLINT_MIN(len1, trunc), poly2[0], mod); if (len2 == 1) return; /* Set res[i+len1-1] = in1[len1-1]*in2[i] */ if (trunc > len1) _nmod_vec_scalar_mul_nmod(res + len1, poly2 + 1, trunc - len1, poly1[len1 - 1], mod); /* out[i+j] += in1[i]*in2[j] */ for (i = 0; i < FLINT_MIN(len1, trunc) - 1; i++) _nmod_vec_scalar_addmul_nmod(res + i + 1, poly2 + 1, FLINT_MIN(len2, trunc - i) - 1, poly1[i], mod); } } }
void mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_limb_t h; h = up[0] * vp[n - 1]; if (n != 1) { mp_size_t i; mp_limb_t v0; v0 = *vp++; h += up[n - 1] * v0 + mpn_mul_1 (rp, up, n - 1, v0); rp++; for (i = n - 2; i > 0; i--) { v0 = *vp++; h += up[i] * v0 + mpn_addmul_1 (rp, up, i, v0); rp++; } } rp[0] = h; }
void mpz_mul_si(mpz_ptr prod, mpz_srcptr mult, long int small_mult) { mp_size_t size = mult->_mp_size; mp_size_t sign_product = size; mp_limb_t cy; mp_size_t prod_size; mp_ptr prod_ptr; if(size == 0 || small_mult == 0) { prod->_mp_size = 0; return; } size = ABS(size); prod_size = size + 1; if(prod->_mp_alloc < prod_size) { _mpz_realloc(prod, prod_size); } prod_ptr = prod->_mp_d; cy = mpn_mul_1(prod_ptr, mult->_mp_d, size, (mp_limb_t)ABS(small_mult)); if(cy != 0) { prod_ptr[size] = cy; size++; } prod->_mp_size = ((sign_product < 0) ^ (small_mult < 0)) ? -size : size; }
/* (rp, 2n) = (xp, n)*(yp, n) / B^n */ inline static void mpn_mulshort_n_basecase(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) { mp_size_t i, k; #if GMP_NAIL_BITS==0 mp_limb_t t1, t2, t3; #endif ASSERT(n >= 3); /* this restriction doesn't make a lot of sense in general */ ASSERT_MPN(xp, n); ASSERT_MPN(yp, n); ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n)); ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n)); k = n - 2; /* so want short product sum_(i + j >= k) x[i]y[j]B^(i + j) */ #if GMP_NAIL_BITS!=0 rp[n] = mpn_mul_1(rp + k, xp + k, 2, yp[0]); #else umul_ppmm(t1, rp[k], xp[k], yp[0]); umul_ppmm(t3, t2, xp[k + 1], yp[0]); add_ssaaaa(rp[n], rp[k + 1], t3, t2, 0, t1); #endif for (i = 1; i <= n - 2; i++) rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]); rp[n + n - 1] = mpn_addmul_1 (rp + n - 1, xp, n, yp[n - 1]); return; }
mp_limb_t mpn_addadd_n(mp_ptr t,mp_srcptr x,mp_srcptr y,mp_srcptr z,mp_size_t n) {mp_limb_t ret; mp_srcptr a=x,b=y,c=z; ASSERT(n>0); ASSERT_MPN(x,n);ASSERT_MPN(y,n);ASSERT_MPN(z,n);//ASSERT_SPACE(t,n); ASSERT(MPN_SAME_OR_SEPARATE_P(t,x,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(t,y,n)); ASSERT(MPN_SAME_OR_SEPARATE_P(t,z,n)); if(t==x) {if(t==y) {if(t==z) { #ifdef HAVE_NATIVE_mpn_addlsh1_n return mpn_addlsh1_n(t,x,y,n); #else return mpn_mul_1(t,x,n,3); #endif } } else {MP_SRCPTR_SWAP(b,c);} } else {MP_SRCPTR_SWAP(a,c);if(t==y)MP_SRCPTR_SWAP(a,b);} ret=mpn_add_n(t,a,b,n);return ret+mpn_add_n(t,t,c,n);}
void check_one (const char *desc, mpf_ptr got, mpf_srcptr u, mpir_ui v) { mp_size_t usize, usign; mp_ptr wp; mpf_t want; MPF_CHECK_FORMAT (got); /* this code not nailified yet */ ASSERT_ALWAYS (BITS_PER_UI <= GMP_NUMB_BITS); usign = SIZ (u); usize = ABS (usign); wp = refmpn_malloc_limbs (usize + 1); wp[usize] = mpn_mul_1 (wp, PTR(u), usize, (mp_limb_t) v); PTR(want) = wp; SIZ(want) = (usign >= 0 ? usize+1 : -(usize+1)); EXP(want) = EXP(u) + 1; refmpf_normalize (want); if (! refmpf_validate ("mpf_mul_ui", got, want)) { mp_trace_base = -16; printf (" %s\n", desc); mpf_trace (" u", u); printf (" v %ld 0x%lX\n", v, v); abort (); } free (wp); }
/* puts in {rp, n} the low part of {np, n} times {mp, n}, i.e. equivalent to: mp_ptr tp; TMP_DECL(marker); TMP_MARK(marker); tp = TMP_ALLOC_LIMBS (2 * n); mpn_mul_n (tp, np, mp, n); MPN_COPY (rp, tp, n); TMP_FREE(marker); */ void ecm_mul_lo_basecase (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n) { mpn_mul_1 (rp, np, n, mp[0]); for (; --n;) mpn_addmul_1 (++rp, np, n, (++mp)[0]); }
long _fmpr_mul_mpn(fmpr_t z, mp_srcptr xman, mp_size_t xn, const fmpz_t xexp, mp_srcptr yman, mp_size_t yn, const fmpz_t yexp, int negative, long prec, fmpr_rnd_t rnd) { long zn, alloc, ret, shift; mp_limb_t tmp_stack[MUL_STACK_ALLOC]; mp_ptr tmp; zn = xn + yn; alloc = zn; MUL_TMP_ALLOC if (yn == 1) { mp_limb_t cy = mpn_mul_1(tmp, xman, xn, yman[0]); tmp[zn - 1] = cy; zn = zn - (cy == 0); } else { mpn_mul(tmp, xman, xn, yman, yn); zn = zn - (tmp[zn - 1] == 0); } ret = _fmpr_set_round_mpn(&shift, fmpr_manref(z), tmp, zn, negative, prec, rnd); fmpz_add2_fmpz_si_inline(fmpr_expref(z), xexp, yexp, shift); MUL_TMP_FREE return ret; }
void mpn_mullow_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_size_t i; mpn_mul_1 (rp, up, n, vp[0]); for (i = 1; i < n; i++) mpn_addmul_1 (rp + i, up, n - i, vp[i]); }
/* Put in rp[0..n] the n+1 low limbs of {up, n} * {vp, n}. Assume 2n limbs are allocated at rp. */ static void mpfr_mullow_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up, mpfr_limb_srcptr vp, mp_size_t n) { mp_size_t i; rp[n] = mpn_mul_1 (rp, up, n, vp[0]); for (i = 1 ; i < n ; i++) mpn_addmul_1 (rp + i, up, n - i + 1, vp[i]); }
void mpz_lcm (mpz_ptr r, mpz_srcptr u, mpz_srcptr v) { mpz_t g; mp_size_t usize, vsize; TMP_DECL; usize = SIZ (u); vsize = SIZ (v); if (usize == 0 || vsize == 0) { SIZ (r) = 0; return; } usize = ABS (usize); vsize = ABS (vsize); if (vsize == 1 || usize == 1) { mp_limb_t vl, gl, c; mp_srcptr up; mp_ptr rp; if (usize == 1) { usize = vsize; MPZ_SRCPTR_SWAP (u, v); } MPZ_REALLOC (r, usize+1); up = PTR(u); vl = PTR(v)[0]; gl = mpn_gcd_1 (up, usize, vl); vl /= gl; rp = PTR(r); c = mpn_mul_1 (rp, up, usize, vl); rp[usize] = c; usize += (c != 0); SIZ(r) = usize; return; } TMP_MARK; MPZ_TMP_INIT (g, usize); /* v != 0 implies |gcd(u,v)| <= |u| */ mpz_gcd (g, u, v); mpz_divexact (g, u, g); mpz_mul (r, g, v); SIZ (r) = ABS (SIZ (r)); /* result always positive */ TMP_FREE; }
static void fp_mul_si(element_ptr e, element_ptr a, signed long int op) { fp_field_data_ptr p = e->field->data; size_t t = p->limbs; mp_limb_t *tmp = _alloca((t + 1) * sizeof(mp_limb_t)); mp_limb_t qp[2]; tmp[t] = mpn_mul_1(tmp, a->data, t, labs(op)); mpn_tdiv_qr(qp, e->data, 0, tmp, t + 1, p->primelimbs, t); if (op < 0) { fp_neg(e, e); } }
/* Compute r such that r^2 * y = 1 (mod 2^{b+1}). Return non-zero if such an integer r exists. Iterates r' <-- (3r - r^3 y) / 2 using Hensel lifting. Since we divide by two, the Hensel lifting is somewhat degenerates. Therefore, we lift from 2^b to 2^{b+1}-1. FIXME: (1) Simplify to do precision book-keeping in limbs rather than bits. (2) Rewrite iteration as r' <-- r - r (r^2 y - 1) / 2 and take advantage of zero low part of r^2 y - 1. (3) Use wrap-around trick. (4) Use a small table to get starting value. */ int mpn_bsqrtinv (mp_ptr rp, mp_srcptr yp, mp_bitcnt_t bnb, mp_ptr tp) { mp_ptr tp2, tp3; mp_limb_t k; mp_size_t bn, order[GMP_LIMB_BITS + 1]; int i, d; ASSERT (bnb > 0); bn = 1 + bnb / GMP_LIMB_BITS; tp2 = tp + bn; tp3 = tp + 2 * bn; k = 3; rp[0] = 1; if (bnb == 1) { if ((yp[0] & 3) != 1) return 0; } else { if ((yp[0] & 7) != 1) return 0; d = 0; for (; bnb != 2; bnb = (bnb + 2) >> 1) order[d++] = bnb; for (i = d - 1; i >= 0; i--) { bnb = order[i]; bn = 1 + bnb / GMP_LIMB_BITS; mpn_mul_1 (tp, rp, bn, k); mpn_powlo (tp2, rp, &k, 1, bn, tp3); mpn_mullo_n (rp, yp, tp2, bn); #if HAVE_NATIVE_mpn_rsh1sub_n mpn_rsh1sub_n (rp, tp, rp, bn); #else mpn_sub_n (tp2, tp, rp, bn); mpn_rshift (rp, tp2, bn, 1); #endif } } return 1; }
void ecc_modp_mul_1 (const struct ecc_curve *ecc, mp_limb_t *rp, const mp_limb_t *ap, mp_limb_t b) { mp_limb_t hi; assert (b <= 0xffffffff); hi = mpn_mul_1 (rp, ap, ecc->size, b); hi = mpn_addmul_1 (rp, ecc->Bmodp, ecc->size, hi); assert (hi <= 1); hi = cnd_add_n (hi, rp, ecc->Bmodp, ecc->size); /* Sufficient if b < B^size / p */ assert (hi == 0); }
void mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_size_t i; mpn_mul_1 (rp, up, n, vp[0]); for (i = n - 1; i > 0; i--) { vp++; rp++; mpn_addmul_1 (rp, up, i, vp[0]); } }
mp_limb_t mpn_submul_1 (mp_ptr rp, mp_srcptr s1p, mp_size_t n, mp_limb_t s2d) { mp_ptr tp; mp_limb_t cy; TMP_DECL; TMP_MARK; tp = TMP_ALLOC_LIMBS (n); cy = mpn_mul_1 (tp, s1p, n, s2d); cy += mpn_sub_n (rp, rp, tp, n); TMP_FREE; return cy; }
/* Define our own squaring function, which uses mpn_sqr_basecase for its allowed sizes, but its own code for larger sizes. */ static void mpn_local_sqr (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr tp) { mp_size_t i; ASSERT (n >= 1); ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n)); if (BELOW_THRESHOLD (n, SQR_BASECASE_LIM)) { mpn_sqr_basecase (rp, up, n); return; } { mp_limb_t ul, lpl; ul = up[0]; umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); rp[0] = lpl >> GMP_NAIL_BITS; } if (n > 1) { mp_limb_t cy; cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]); tp[n - 1] = cy; for (i = 2; i < n; i++) { mp_limb_t cy; cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]); tp[n + i - 2] = cy; } MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1); { mp_limb_t cy; #if HAVE_NATIVE_mpn_addlsh1_n cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); #else cy = mpn_lshift (tp, tp, 2 * n - 2, 1); cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); #endif rp[2 * n - 1] += cy; } } }
void _nmod_vec_scalar_mul_nmod(mp_ptr res, mp_srcptr vec, slong len, mp_limb_t c, nmod_t mod) { if (mod.norm >= FLINT_BITS/2) /* products will fit in a limb */ { mpn_mul_1(res, vec, len, c); _nmod_vec_reduce(res, res, len, mod); } else /* products may take two limbs */ { slong i; for (i = 0; i < len; i++) { mp_limb_t hi, lo; umul_ppmm(hi, lo, vec[i], c); NMOD_RED2(res[i], hi, lo, mod); /* hi already reduced mod n */ } } }
void impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) { mp_size_t i; mp_limb_t cy_limb; mp_limb_t v_limb; /* Multiply by the first limb in V separately, as the result can be stored (not added) to PROD. We also avoid a loop for zeroing. */ v_limb = vp[0]; if (v_limb <= 1) { if (v_limb == 1) MPN_COPY (prodp, up, size); else MPN_ZERO (prodp, size); cy_limb = 0; } else cy_limb = mpn_mul_1 (prodp, up, size, v_limb); prodp[size] = cy_limb; prodp++; /* For each iteration in the outer loop, multiply one limb from U with one limb from V, and add it to PROD. */ for (i = 1; i < size; i++) { v_limb = vp[i]; if (v_limb <= 1) { cy_limb = 0; if (v_limb == 1) cy_limb = mpn_add_n (prodp, prodp, up, size); } else cy_limb = mpn_addmul_1 (prodp, up, size, v_limb); prodp[size] = cy_limb; prodp++; } }
static void print_mpn_fp (const mp_limb_t *x, unsigned int dp, unsigned int base) { unsigned int i; mp1 tx; memcpy (tx, x, sizeof (mp1)); if (base == 16) fputs ("0x", stdout); assert (x[SZ-1] < base); fputc (hexdig[x[SZ - 1]], stdout); fputc ('.', stdout); for (i = 0; i < dp; i++) { tx[SZ - 1] = 0; mpn_mul_1 (tx, tx, SZ, base); assert (tx[SZ - 1] < base); fputc (hexdig[tx[SZ - 1]], stdout); } }
void fmpz_mul_ui(fmpz_t output, const fmpz_t input, const unsigned long x) { if (x == 0) { output[0] = 0; return; } mp_limb_t mslimb; if (output[0] = input[0]) // This isn't a typo { mslimb = mpn_mul_1(output+1, input+1, FLINT_ABS(input[0]), x); if (mslimb) { output[FLINT_ABS(input[0])+1] = mslimb; if ((long) output[0] > 0) output[0]++; else output[0]--; } } }
mp_size_t mpn_prod_limbs_direct(mp_limb_t * result, const mp_limb_t * factors, mp_size_t n) { mp_size_t k, len; mp_limb_t top; if (n < 1) { result[0] = 1UL; return 1; } result[0] = factors[0]; len = 1; for (k=1; k<n; k++) { top = mpn_mul_1(result, result, len, factors[k]); if (top) { result[len] = top; len++; } } return len; }
// Montgomery multiplication. // See Blake, Seroussi and Smart. static inline void mont_mul(mp_limb_t *c, mp_limb_t *a, mp_limb_t *b, fptr p) { // Instead of right shifting every iteration // I allocate more room for the z array. size_t i, t = p->limbs; #ifdef _MSC_VER // for VC++ compatibility mp_limb_t z[2 * MAX_LIMBS + 1]; #else mp_limb_t z[2 * t + 1]; #endif mp_limb_t u = (a[0] * b[0]) * p->negpinv; mp_limb_t v = z[t] = mpn_mul_1(z, b, t, a[0]); z[t] += mpn_addmul_1(z, p->primelimbs, t, u); z[t + 1] = z[t] < v; // Handle overflow. for (i = 1; i < t; i++) { u = (z[i] + a[i] * b[0]) * p->negpinv; v = z[t + i] += mpn_addmul_1(z + i, b, t, a[i]); z[t + i] += mpn_addmul_1(z + i, p->primelimbs, t, u); z[t + i + 1] = z[t + i] < v; } if (z[t * 2] || mpn_cmp(z + t, p->primelimbs, t) >= 0) { mpn_sub_n(c, z + t, p->primelimbs, t); } else { memcpy(c, z + t, t * sizeof(mp_limb_t)); // Doesn't seem to make a difference: /* mpz_t z1, z2; z1->_mp_d = c; z2->_mp_d = z + t; z1->_mp_size = z1->_mp_alloc = z2->_mp_size = z2->_mp_alloc = t; mpz_set(z1, z2); */ } }
void _nmod_poly_divrem_q1(mp_ptr Q, mp_ptr R, mp_srcptr A, long lenA, mp_srcptr B, long lenB, nmod_t mod) { const mp_limb_t invL = (B[lenB-1] == 1) ? 1 : n_invmod(B[lenB-1], mod.n); if (lenB == 1) { _nmod_vec_scalar_mul_nmod(Q, A, lenA, invL, mod); } else { mp_limb_t t; Q[1] = n_mulmod2_preinv(A[lenA-1], invL, mod.n, mod.ninv); t = n_mulmod2_preinv(Q[1], B[lenB-2], mod.n, mod.ninv); t = n_submod(A[lenA-2], t, mod.n); Q[0] = n_mulmod2_preinv(t, invL, mod.n, mod.ninv); if (FLINT_BITS + 2 <= 2 * mod.norm) { mpn_mul_1(R, B, lenB - 1, Q[0]); if (lenB > 2) mpn_addmul_1(R + 1, B, lenB - 2, Q[1]); _nmod_vec_reduce(R, R, lenB - 1, mod); } else { _nmod_vec_scalar_mul_nmod(R, B, lenB - 1, Q[0], mod); if (lenB > 2) _nmod_vec_scalar_addmul_nmod(R + 1, B, lenB - 2, Q[1], mod); } _nmod_vec_sub(R, A, R, lenB - 1, mod); } }
dig_t fp_mul1_low(dig_t *c, const dig_t *a, dig_t digit) { return mpn_mul_1(c, a, FP_DIGS, digit); }
void tc4_addmul_1(mp_ptr wp, mp_size_t * wn, mp_srcptr xp, mp_size_t xn, mp_limb_t y) { mp_size_t sign, wu, xu, ws, new_wn, min_size, dsize; mp_limb_t cy; /* w unaffected if x==0 or y==0 */ if (xn == 0 || y == 0) return; sign = xn; xu = ABS (xn); ws = *wn; if (*wn == 0) { /* nothing to add to, just set x*y, "sign" gives the sign */ cy = mpn_mul_1 (wp, xp, xu, y); if (cy) { wp[xu] = cy; xu = xu + 1; } *wn = (sign >= 0 ? xu : -xu); return; } sign ^= *wn; wu = ABS (*wn); new_wn = MAX (wu, xu); min_size = MIN (wu, xu); if (sign >= 0) { /* addmul of absolute values */ cy = mpn_addmul_1 (wp, xp, min_size, y); dsize = xu - wu; #if HAVE_NATIVE_mpn_mul_1c if (dsize > 0) cy = mpn_mul_1c (wp + min_size, xp + min_size, dsize, y, cy); else if (dsize < 0) { dsize = -dsize; cy = mpn_add_1 (wp + min_size, wp + min_size, dsize, cy); } #else if (dsize != 0) { mp_limb_t cy2; if (dsize > 0) cy2 = mpn_mul_1 (wp + min_size, xp + min_size, dsize, y); else { dsize = -dsize; cy2 = 0; } cy = cy2 + mpn_add_1 (wp + min_size, wp + min_size, dsize, cy); } #endif if (cy) { wp[dsize + min_size] = cy; new_wn ++; } } else { /* submul of absolute values */ cy = mpn_submul_1 (wp, xp, min_size, y); if (wu >= xu) { /* if w bigger than x, then propagate borrow through it */ if (wu != xu) cy = mpn_sub_1 (wp + xu, wp + xu, wu - xu, cy); if (cy != 0) { /* Borrow out of w, take twos complement negative to get absolute value, flip sign of w. */ wp[new_wn] = ~-cy; /* extra limb is 0-cy */ mpn_not (wp, new_wn); new_wn++; MPN_INCR_U (wp, new_wn, CNST_LIMB(1)); ws = -*wn; } } else /* wu < xu */ { /* x bigger than w, so want x*y-w. Submul has given w-x*y, so take twos complement and use an mpn_mul_1 for the rest. */ mp_limb_t cy2; /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */ mpn_not (wp, wu); cy += mpn_add_1 (wp, wp, wu, CNST_LIMB(1)); cy -= 1; /* If cy-1 == -1 then hold that -1 for latter. mpn_submul_1 never returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */ cy2 = (cy == MP_LIMB_T_MAX); cy += cy2; MPN_MUL_1C (cy, wp + wu, xp + wu, xu - wu, y, cy); wp[new_wn] = cy; new_wn += (cy != 0); /* Apply any -1 from above. The value at wp+wsize is non-zero because y!=0 and the high limb of x will be non-zero. */ if (cy2) MPN_DECR_U (wp+wu, new_wn - wu, CNST_LIMB(1)); ws = -*wn; } /* submul can produce high zero limbs due to cancellation, both when w has more limbs or x has more */ MPN_NORMALIZE (wp, new_wn); } *wn = (ws >= 0 ? new_wn : -new_wn); ASSERT (new_wn == 0 || wp[new_wn - 1] != 0); }
int mpfr_mul_ui (mpfr_ptr y, mpfr_srcptr x, unsigned long int u, mpfr_rnd_t rnd_mode) { mp_limb_t *yp; mp_size_t xn; int cnt, inexact; MPFR_TMP_DECL (marker); if (MPFR_UNLIKELY (MPFR_IS_SINGULAR (x))) { if (MPFR_IS_NAN (x)) { MPFR_SET_NAN (y); MPFR_RET_NAN; } else if (MPFR_IS_INF (x)) { if (u != 0) { MPFR_SET_INF (y); MPFR_SET_SAME_SIGN (y, x); MPFR_RET (0); /* infinity is exact */ } else /* 0 * infinity */ { MPFR_SET_NAN (y); MPFR_RET_NAN; } } else /* x is zero */ { MPFR_ASSERTD (MPFR_IS_ZERO (x)); MPFR_SET_ZERO (y); MPFR_SET_SAME_SIGN (y, x); MPFR_RET (0); /* zero is exact */ } } else if (MPFR_UNLIKELY (u <= 1)) { if (u < 1) { MPFR_SET_ZERO (y); MPFR_SET_SAME_SIGN (y, x); MPFR_RET (0); /* zero is exact */ } else return mpfr_set (y, x, rnd_mode); } else if (MPFR_UNLIKELY (IS_POW2 (u))) return mpfr_mul_2si (y, x, MPFR_INT_CEIL_LOG2 (u), rnd_mode); yp = MPFR_MANT (y); xn = MPFR_LIMB_SIZE (x); MPFR_ASSERTD (xn < MP_SIZE_T_MAX); MPFR_TMP_MARK(marker); yp = MPFR_TMP_LIMBS_ALLOC (xn + 1); MPFR_ASSERTN (u == (mp_limb_t) u); yp[xn] = mpn_mul_1 (yp, MPFR_MANT (x), xn, u); /* x * u is stored in yp[xn], ..., yp[0] */ /* since the case u=1 was treated above, we have u >= 2, thus yp[xn] >= 1 since x was msb-normalized */ MPFR_ASSERTD (yp[xn] != 0); if (MPFR_LIKELY (MPFR_LIMB_MSB (yp[xn]) == 0)) { count_leading_zeros (cnt, yp[xn]); mpn_lshift (yp, yp, xn + 1, cnt); } else { cnt = 0; } /* now yp[xn], ..., yp[0] is msb-normalized too, and has at most PREC(x) + (GMP_NUMB_BITS - cnt) non-zero bits */ MPFR_RNDRAW (inexact, y, yp, (mpfr_prec_t) (xn + 1) * GMP_NUMB_BITS, rnd_mode, MPFR_SIGN (x), cnt -- ); MPFR_TMP_FREE (marker); cnt = GMP_NUMB_BITS - cnt; if (MPFR_UNLIKELY (__gmpfr_emax < MPFR_EMAX_MIN + cnt || MPFR_GET_EXP (x) > __gmpfr_emax - cnt)) return mpfr_overflow (y, rnd_mode, MPFR_SIGN(x)); MPFR_SET_EXP (y, MPFR_GET_EXP (x) + cnt); MPFR_SET_SAME_SIGN (y, x); return inexact; }
void compute_B_terms(QS_t * qs_inf, poly_t * poly_inf) { unsigned long s = poly_inf->s; unsigned long * A_ind = poly_inf->A_ind; unsigned long * A_modp = poly_inf->A_modp; unsigned long * B_terms = poly_inf->B_terms; prime_t * factor_base = qs_inf->factor_base; unsigned long limbs = qs_inf->prec+1; unsigned long limbs2; unsigned long * A = poly_inf->A; unsigned long * B = poly_inf->B; unsigned long p, i; unsigned long * temp1 = (unsigned long *) flint_stack_alloc(limbs); unsigned long temp; mp_limb_t msl; double pinv; for (i = 0; i < s; i++) { p = factor_base[A_ind[i]].p; pinv = z_precompute_inverse(p); mpn_divmod_1(temp1 + 1, A + 1, A[0], p); temp1[0] = A[0] - (temp1[A[0]] == 0); A_modp[i] = (temp = mpn_mod_1(temp1 + 1, temp1[0], p)); temp = z_invert(temp, p); temp = z_mulmod_precomp(temp, qs_inf->sqrts[A_ind[i]], p, pinv); if (temp > p/2) temp = p - temp; msl = mpn_mul_1(B_terms + i*limbs + 1, temp1 + 1, temp1[0], temp); if (msl) { B_terms[i*limbs + temp1[0] + 1] = msl; B_terms[i*limbs] = temp1[0] + 1; } else B_terms[i*limbs] = temp1[0]; #if B_TERMS mpz_t temp; mpz_init(temp); fmpz_to_mpz(temp, B_terms + i*limbs); gmp_printf("B_%ld = %Zd\n", i, temp); mpz_clear(temp); #endif } F_mpn_copy(B, B_terms, B_terms[0]+1); // Set B to the sum of the B terms if (limbs > B_terms[0] + 1) F_mpn_clear(B + B_terms[0] + 1, limbs - B_terms[0] - 1); for (i = 1; i < s; i++) { limbs2 = B_terms[i*limbs]; msl = mpn_add_n(B+1, B+1, B_terms + i*limbs + 1, limbs2); if (msl) mpn_add_1(B + limbs2 + 1, B + limbs2 + 1, limbs - limbs2 - 1, msl); } B[0] = limbs - 1; while (!B[B[0]] && B[0]) B[0]--; #if B_TERMS mpz_t temp2; mpz_init(temp2); fmpz_to_mpz(temp2, B); gmp_printf("B = %Zd\n", temp2); mpz_clear(temp2); #endif flint_stack_release(); // release temp1 }
mp_limb_t mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl) { mp_limb_t cy[n]; mp_limb_t a, b, r, s0, s1, c0, c1; mp_size_t i; int more_carries; if (up == rp) { /* The algorithm used below cannot handle overlap. Handle it here by making a temporary copy of the source vector, then call ourselves. */ mp_limb_t xp[n]; MPN_COPY (xp, up, n); return mpn_mul_1 (rp, xp, n, vl); } a = up[0] * vl; rp[0] = a; cy[0] = 0; /* Main multiply loop. Generate a raw accumulated output product in rp[] and a carry vector in cy[]. */ #pragma _CRI ivdep for (i = 1; i < n; i++) { a = up[i] * vl; b = _int_mult_upper (up[i - 1], vl); s0 = a + b; c0 = ((a & b) | ((a | b) & ~s0)) >> 63; rp[i] = s0; cy[i] = c0; } /* Carry add loop. Add the carry vector cy[] to the raw sum rp[] and store the new sum back to rp[0]. */ more_carries = 0; #pragma _CRI ivdep for (i = 2; i < n; i++) { r = rp[i]; c0 = cy[i - 1]; s0 = r + c0; rp[i] = s0; c0 = (r & ~s0) >> 63; more_carries += c0; } /* If that second loop generated carry, handle that in scalar loop. */ if (more_carries) { mp_limb_t cyrec = 0; /* Look for places where rp[k] is zero and cy[k-1] is non-zero. These are where we got a recurrency carry. */ for (i = 2; i < n; i++) { r = rp[i]; c0 = (r == 0 && cy[i - 1] != 0); s0 = r + cyrec; rp[i] = s0; c1 = (r & ~s0) >> 63; cyrec = c0 | c1; } return _int_mult_upper (up[n - 1], vl) + cyrec + cy[n - 1]; } return _int_mult_upper (up[n - 1], vl) + cy[n - 1]; }