/* Multiply M by M1 from the right. Since the M1 elements fit in GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs temporary space M->n */ static void ngcd_matrix_mul_1 (struct ngcd_matrix *M, const struct ngcd_matrix1 *M1) { unsigned row; mp_limb_t grow; for (row = 0, grow = 0; row < 2; row++) { mp_limb_t c0, c1; /* Compute (u, u') <-- (r00 u + r10 u', r01 u + r11 u') as t = u u *= r00 u += r10 * u' u' *= r11 u' += r01 * t */ MPN_COPY (M->tp, M->p[row][0], M->n); c0 = mpn_mul_1 (M->p[row][0], M->p[row][0], M->n, M1->u[0][0]); c0 += mpn_addmul_1 (M->p[row][0], M->p[row][1], M->n, M1->u[1][0]); M->p[row][0][M->n] = c0; c1 = mpn_mul_1 (M->p[row][1], M->p[row][1], M->n, M1->u[1][1]); c1 += mpn_addmul_1 (M->p[row][1], M->tp, M->n, M1->u[0][1]); M->p[row][1][M->n] = c1; grow |= (c0 | c1); } M->n += (grow != 0); ASSERT (M->n < M->alloc); }
/* (rp, 2n) = (xp, n)*(yp, n) / B^n */ inline static void mpn_mulshort_n_basecase(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) { mp_size_t i, k; #if GMP_NAIL_BITS==0 mp_limb_t t1, t2, t3; #endif ASSERT(n >= 3); /* this restriction doesn't make a lot of sense in general */ ASSERT_MPN(xp, n); ASSERT_MPN(yp, n); ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n)); ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n)); k = n - 2; /* so want short product sum_(i + j >= k) x[i]y[j]B^(i + j) */ #if GMP_NAIL_BITS!=0 rp[n] = mpn_mul_1(rp + k, xp + k, 2, yp[0]); #else umul_ppmm(t1, rp[k], xp[k], yp[0]); umul_ppmm(t3, t2, xp[k + 1], yp[0]); add_ssaaaa(rp[n], rp[k + 1], t3, t2, 0, t1); #endif for (i = 1; i <= n - 2; i++) rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]); rp[n + n - 1] = mpn_addmul_1 (rp + n - 1, xp, n, yp[n - 1]); return; }
void bn_sqra_low(dig_t *c, const dig_t *a, int size) { dig_t carry, digit = *a; carry = mpn_addmul_1(c, a, size, digit); mpn_add_1(c + size, c + size, size, carry); if (size > 1) { carry = mpn_addmul_1(c + 1, a + 1, size - 1, digit); mpn_add_1(c + size, c + size, size, carry); } }
you lose #endif /* For testing purposes, define our own mpn_addmul_2 if there is none already available. */ #ifndef HAVE_NATIVE_mpn_addmul_2 mp_limb_t mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp) { rp[n] = mpn_addmul_1 (rp, up, n, vp[0]); return mpn_addmul_1 (rp + 1, up, n, vp[1]); }
void ecc_modp_addmul_1 (const struct ecc_curve *ecc, mp_limb_t *rp, const mp_limb_t *ap, mp_limb_t b) { mp_limb_t hi; assert (b <= 0xffffffff); hi = mpn_addmul_1 (rp, ap, ecc->size, b); hi = mpn_addmul_1 (rp, ecc->Bmodp, ecc->size, hi); assert (hi <= 1); hi = cnd_add_n (hi, rp, ecc->Bmodp, ecc->size); /* Sufficient roughly if b < B^size / p */ assert (hi == 0); }
void _nmod_poly_div_basecase_2(mp_ptr Q, mp_ptr W, mp_srcptr A, long A_len, mp_srcptr B, long B_len, nmod_t mod) { long coeff, i, len; mp_limb_t lead_inv = n_invmod(B[B_len - 1], mod.n); mp_ptr B2, R2; mp_srcptr Btop; B2 = W; for (i = 0; i < B_len - 1; i++) { B2[2 * i] = B[i]; B2[2 * i + 1] = 0; } Btop = B2 + 2*(B_len - 1); R2 = W + 2*(B_len - 1); for (i = 0; i < A_len - B_len + 1; i++) { R2[2 * i] = A[B_len + i - 1]; R2[2 * i + 1] = 0; } coeff = A_len - B_len; while (coeff >= 0) { mp_limb_t r_coeff; r_coeff = n_ll_mod_preinv(R2[2 * coeff + 1], R2[2 * coeff], mod.n, mod.ninv); while (coeff >= 0 && r_coeff == 0L) { Q[coeff--] = 0L; if (coeff >= 0) r_coeff = n_ll_mod_preinv(R2[2 * coeff + 1], R2[2 * coeff], mod.n, mod.ninv); } if (coeff >= 0) { mp_limb_t c, * R_sub; Q[coeff] = n_mulmod2_preinv(r_coeff, lead_inv, mod.n, mod.ninv); c = n_negmod(Q[coeff], mod.n); len = FLINT_MIN(B_len - 1, coeff); R_sub = R2 + 2 * (coeff - len); if (len > 0) mpn_addmul_1(R_sub, Btop - 2*len, 2 * len, c); coeff--; } } }
/* puts in {rp, n} the low part of {np, n} times {mp, n}, i.e. equivalent to: mp_ptr tp; TMP_DECL(marker); TMP_MARK(marker); tp = TMP_ALLOC_LIMBS (2 * n); mpn_mul_n (tp, np, mp, n); MPN_COPY (rp, tp, n); TMP_FREE(marker); */ void ecm_mul_lo_basecase (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n) { mpn_mul_1 (rp, np, n, mp[0]); for (; --n;) mpn_addmul_1 (++rp, np, n, (++mp)[0]); }
void _nmod_poly_divrem_basecase_1(mp_ptr Q, mp_ptr R, mp_ptr W, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); slong iR; mp_ptr ptrQ = Q - lenB + 1; mp_ptr R1 = W; flint_mpn_copyi(R1, A, lenA); for (iR = lenA - 1; iR >= lenB - 1; iR--) { if (R1[iR] == 0) { ptrQ[iR] = WORD(0); } else { ptrQ[iR] = n_mulmod2_preinv(R1[iR], invL, mod.n, mod.ninv); if (lenB > 1) { const mp_limb_t c = n_negmod(ptrQ[iR], mod.n); mpn_addmul_1(R1 + iR - lenB + 1, B, lenB - 1, c); } } } if (lenB > 1) _nmod_vec_reduce(R, R1, lenB - 1, mod); }
void fp_rdcn_low(dig_t *c, dig_t *a) { int i; dig_t r, c0, c1, u, *tmp; const dig_t *m; u = *(fp_prime_get_rdc()); m = fp_prime_get(); tmp = a; c1 = 0; for (i = 0; i < FP_DIGS; i++, tmp++) { r = (dig_t)(*tmp * u); c0 = mpn_addmul_1(tmp, m, FP_DIGS, r); c1 += mpn_add_1(tmp + FP_DIGS, tmp + FP_DIGS, FP_DIGS - i, c0); } for (i = 0; i < FP_DIGS; i++, tmp++) { c[i] = *tmp; } for (i = 0; i < c1; i++) { fp_subn_low(c, c, m); } if (fp_cmp(c, m) != CMP_LT) { fp_subn_low(c, c, m); } }
void mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_limb_t h; h = up[0] * vp[n - 1]; if (n != 1) { mp_size_t i; mp_limb_t v0; v0 = *vp++; h += up[n - 1] * v0 + mpn_mul_1 (rp, up, n - 1, v0); rp++; for (i = n - 2; i > 0; i--) { v0 = *vp++; h += up[i] * v0 + mpn_addmul_1 (rp, up, i, v0); rp++; } } rp[0] = h; }
void mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip) { mp_limb_t q[2]; mp_size_t j; mp_limb_t upn; mp_limb_t cy; ASSERT_MPN (up, 2*n); if ((n & 1) != 0) { up[0] = mpn_addmul_1 (up, mp, n, (up[0] * mip[0]) & GMP_NUMB_MASK); up++; } for (j = n - 2; j >= 0; j -= 2) { umul2low (q[1], q[0], mip[1], mip[0], up[1], up[0]); upn = up[n]; /* mpn_addmul_2 overwrites this */ up[1] = mpn_addmul_2 (up, mp, n, q); up[0] = up[n]; up[n] = upn; up += 2; } cy = mpn_add_n (rp, up, up - n, n); if (cy != 0) mpn_sub_n (rp, rp, mp, n); }
void _nmod_poly_rem_basecase_1(mp_ptr R, mp_ptr W, mp_srcptr A, long lenA, mp_srcptr B, long lenB, nmod_t mod) { if (lenB > 1) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); long iR; mp_ptr R1 = W; mpn_copyi(R1, A, lenA); for (iR = lenA - 1; iR >= lenB - 1; iR--) { if (R1[iR] != 0) { const mp_limb_t q = n_mulmod2_preinv(R1[iR], invL, mod.n, mod.ninv); const mp_limb_t c = n_negmod(q, mod.n); mpn_addmul_1(R1 + iR - lenB + 1, B, lenB - 1, c); } } _nmod_vec_reduce(R, R1, lenB - 1, mod); } }
/* Assumes poly1 and poly2 are not length 0 and 0 < trunc <= len1 + len2 - 1 */ void _nmod_poly_mullow_classical(mp_ptr res, mp_srcptr poly1, slong len1, mp_srcptr poly2, slong len2, slong trunc, nmod_t mod) { if (len1 == 1 || trunc == 1) /* Special case if the length of output is 1 */ { res[0] = n_mulmod2_preinv(poly1[0], poly2[0], mod.n, mod.ninv); } else /* Ordinary case */ { slong i; slong bits = FLINT_BITS - (slong) mod.norm; slong log_len = FLINT_BIT_COUNT(len2); if (2 * bits + log_len <= FLINT_BITS) { /* Set res[i] = poly1[i]*poly2[0] */ mpn_mul_1(res, poly1, FLINT_MIN(len1, trunc), poly2[0]); if (len2 != 1) { /* Set res[i+len1-1] = in1[len1-1]*in2[i] */ if (trunc > len1) mpn_mul_1(res + len1, poly2 + 1, trunc - len1, poly1[len1 - 1]); /* out[i+j] += in1[i]*in2[j] */ for (i = 0; i < FLINT_MIN(len1, trunc) - 1; i++) mpn_addmul_1(res + i + 1, poly2 + 1, FLINT_MIN(len2, trunc - i) - 1, poly1[i]); } _nmod_vec_reduce(res, res, trunc, mod); } else { /* Set res[i] = poly1[i]*poly2[0] */ _nmod_vec_scalar_mul_nmod(res, poly1, FLINT_MIN(len1, trunc), poly2[0], mod); if (len2 == 1) return; /* Set res[i+len1-1] = in1[len1-1]*in2[i] */ if (trunc > len1) _nmod_vec_scalar_mul_nmod(res + len1, poly2 + 1, trunc - len1, poly1[len1 - 1], mod); /* out[i+j] += in1[i]*in2[j] */ for (i = 0; i < FLINT_MIN(len1, trunc) - 1; i++) _nmod_vec_scalar_addmul_nmod(res + i + 1, poly2 + 1, FLINT_MIN(len2, trunc - i) - 1, poly1[i], mod); } } }
void mpn_mullow_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_size_t i; mpn_mul_1 (rp, up, n, vp[0]); for (i = 1; i < n; i++) mpn_addmul_1 (rp + i, up, n - i, vp[i]); }
/* Put in rp[0..n] the n+1 low limbs of {up, n} * {vp, n}. Assume 2n limbs are allocated at rp. */ static void mpfr_mullow_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up, mpfr_limb_srcptr vp, mp_size_t n) { mp_size_t i; rp[n] = mpn_mul_1 (rp, up, n, vp[0]); for (i = 1 ; i < n ; i++) mpn_addmul_1 (rp + i, up, n - i + 1, vp[i]); }
static mp_limb_t DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) { #if USE_MUL_1 && 0 return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s)); #else mp_limb_t __cy; __cy = mpn_lshift(ws,src,n,s); return __cy + mpn_add_n(dst,dst,ws,n); #endif }
void mpn_sbpi1_bdiv_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { mp_size_t i; mp_limb_t cy, q; ASSERT (dn > 0); ASSERT (nn >= dn); ASSERT ((dp[0] & 1) != 0); /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK, but some over N/Q overlaps will not work. */ for (i = nn - dn; i > 0; i--) { q = dinv * np[0]; cy = mpn_addmul_1 (np, dp, dn, q); mpn_add_1 (np + dn, np + dn, i, cy); ASSERT (np[0] == 0); qp[0] = ~q; qp++; np++; } for (i = dn; i > 1; i--) { q = dinv * np[0]; mpn_addmul_1 (np, dp, i, q); ASSERT (np[0] == 0); qp[0] = ~q; qp++; np++; } /* Final limb */ q = dinv * np[0]; qp[0] = ~q; mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1); }
void _nmod_poly_divrem_basecase_3(mp_ptr Q, mp_ptr R, mp_ptr W, mp_srcptr A, slong lenA, mp_srcptr B, slong lenB, nmod_t mod) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); slong iR, i; mp_ptr B3 = W, R3 = W + 3*(lenB - 1), ptrQ = Q - lenB + 1; for (i = 0; i < lenB - 1; i++) { B3[3 * i] = B[i]; B3[3 * i + 1] = 0; B3[3 * i + 2] = 0; } for (i = 0; i < lenA; i++) { R3[3 * i] = A[i]; R3[3 * i + 1] = 0; R3[3 * i + 2] = 0; } for (iR = lenA - 1; iR >= lenB - 1; ) { mp_limb_t r = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1], R3[3 * iR], mod.n, mod.ninv); while ((iR + 1 >= lenB) && (r == WORD(0))) { ptrQ[iR--] = WORD(0); if (iR + 1 >= lenB) r = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1], R3[3 * iR], mod.n, mod.ninv); } if (iR + 1 >= lenB) { ptrQ[iR] = n_mulmod2_preinv(r, invL, mod.n, mod.ninv); if (lenB > 1) { const mp_limb_t c = n_negmod(ptrQ[iR], mod.n); mpn_addmul_1(R3 + 3 * (iR - lenB + 1), B3, 3 * lenB - 3, c); } iR--; } } for (iR = 0; iR < lenB - 1; iR++) R[iR] = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1], R3[3 * iR], mod.n, mod.ninv); }
void mpn_sbpi1_bdiv_q (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv) { mp_size_t i; mp_limb_t cy, q; ASSERT (dn > 0); ASSERT (nn >= dn); ASSERT ((dp[0] & 1) != 0); for (i = nn - dn; i > 0; i--) { q = dinv * np[0]; qp[0] = ~q; qp++; cy = mpn_addmul_1 (np, dp, dn, q); mpn_add_1 (np + dn, np + dn, i, cy); ASSERT (np[0] == 0); np++; } for (i = dn; i > 1; i--) { q = dinv * np[0]; qp[0] = ~q; qp++; mpn_addmul_1 (np, dp, i, q); ASSERT (np[0] == 0); np++; } /* Final limb */ q = dinv * np[0]; qp[0] = ~q; mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1); }
/* Put in rp[n..2n-1] an approximation of the n high limbs of {up, n} * {vp, n}. The error is less than n ulps of rp[n] (and the approximation is always less or equal to the truncated full product). Assume 2n limbs are allocated at rp. Implements Algorithm ShortMulNaive from [1]. */ static void mpfr_mulhigh_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up, mpfr_limb_srcptr vp, mp_size_t n) { mp_size_t i; rp += n - 1; umul_ppmm (rp[1], rp[0], up[n-1], vp[0]); /* we neglect up[0..n-2]*vp[0], which is less than B^n */ for (i = 1 ; i < n ; i++) /* here, we neglect up[0..n-i-2] * vp[i], which is less than B^n too */ rp[i + 1] = mpn_addmul_1 (rp, up + (n - i - 1), i + 1, vp[i]); /* in total, we neglect less than n*B^n, i.e., n ulps of rp[n]. */ }
void mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) { mp_size_t i; mpn_mul_1 (rp, up, n, vp[0]); for (i = n - 1; i > 0; i--) { vp++; rp++; mpn_addmul_1 (rp, up, i, vp[0]); } }
// Montgomery multiplication. // See Blake, Seroussi and Smart. static inline void mont_mul(mp_limb_t *c, mp_limb_t *a, mp_limb_t *b, fptr p) { // Instead of right shifting every iteration // I allocate more room for the z array. size_t i, t = p->limbs; #ifdef _MSC_VER // for VC++ compatibility mp_limb_t z[2 * MAX_LIMBS + 1]; #else mp_limb_t z[2 * t + 1]; #endif mp_limb_t u = (a[0] * b[0]) * p->negpinv; mp_limb_t v = z[t] = mpn_mul_1(z, b, t, a[0]); z[t] += mpn_addmul_1(z, p->primelimbs, t, u); z[t + 1] = z[t] < v; // Handle overflow. for (i = 1; i < t; i++) { u = (z[i] + a[i] * b[0]) * p->negpinv; v = z[t + i] += mpn_addmul_1(z + i, b, t, a[i]); z[t + i] += mpn_addmul_1(z + i, p->primelimbs, t, u); z[t + i + 1] = z[t + i] < v; } if (z[t * 2] || mpn_cmp(z + t, p->primelimbs, t) >= 0) { mpn_sub_n(c, z + t, p->primelimbs, t); } else { memcpy(c, z + t, t * sizeof(mp_limb_t)); // Doesn't seem to make a difference: /* mpz_t z1, z2; z1->_mp_d = c; z2->_mp_d = z + t; z1->_mp_size = z1->_mp_alloc = z2->_mp_size = z2->_mp_alloc = t; mpz_set(z1, z2); */ } }
/* Define our own squaring function, which uses mpn_sqr_basecase for its allowed sizes, but its own code for larger sizes. */ static void mpn_local_sqr (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr tp) { mp_size_t i; ASSERT (n >= 1); ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n)); if (BELOW_THRESHOLD (n, SQR_BASECASE_LIM)) { mpn_sqr_basecase (rp, up, n); return; } { mp_limb_t ul, lpl; ul = up[0]; umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS); rp[0] = lpl >> GMP_NAIL_BITS; } if (n > 1) { mp_limb_t cy; cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]); tp[n - 1] = cy; for (i = 2; i < n; i++) { mp_limb_t cy; cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]); tp[n + i - 2] = cy; } MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1); { mp_limb_t cy; #if HAVE_NATIVE_mpn_addlsh1_n cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); #else cy = mpn_lshift (tp, tp, 2 * n - 2, 1); cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); #endif rp[2 * n - 1] += cy; } } }
// Montgomery reduction. // Algorithm II.4 from Blake, Seroussi and Smart. static void mont_reduce(mp_limb_t *x, mp_limb_t *y, fptr p) { size_t t = p->limbs; size_t i; mp_limb_t flag = 0; for (i = 0; i < t; i++) { mp_limb_t u = y[i] * p->negpinv; mp_limb_t carry = mpn_addmul_1(&y[i], p->primelimbs, t, u); //mpn_add_1(&y[i+t], &y[i+t], t - i + 1, carry); flag += mpn_add_1(&y[i + t], &y[i + t], t - i, carry); } if (flag || mpn_cmp(&y[t], p->primelimbs, t) >= 0) { mpn_sub_n(x, &y[t], p->primelimbs, t); } else { // TODO: GMP set might be faster. memcpy(x, &y[t], t * sizeof(mp_limb_t)); } }
/* Set cp[] <- tp[]/R^n mod mp[]. Clobber tp[]. mp[] is n limbs; tp[] is 2n limbs. */ void mpn_redc_1 (mp_ptr cp, mp_ptr tp, mp_srcptr mp, mp_size_t n, mp_limb_t Nprim) { mp_limb_t cy; mp_limb_t q; mp_size_t j; ASSERT_MPN (tp, 2*n); for (j = 0; j < n; j++) { q = (tp[0] * Nprim) & GMP_NUMB_MASK; tp[0] = mpn_addmul_1 (tp, mp, n, q); tp++; } cy = mpn_add_n (cp, tp, tp - n, n); if (cy != 0) mpn_sub_n (cp, cp, mp, n); }
void mpn_redc_1_sec (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm) { mp_size_t j; mp_limb_t cy; ASSERT (n > 0); ASSERT_MPN (up, 2*n); for (j = n - 1; j >= 0; j--) { cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK); ASSERT (up[0] == 0); up[0] = cy; up++; } cy = mpn_add_n (rp, up, up - n, n); mpn_subcnd_n (rp, rp, mp, n, cy); }
void mpn_mullow_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) { if (BELOW_THRESHOLD (n, MULLOW_BASECASE_THRESHOLD)) { /* Allocate workspace of fixed size on stack: fast! */ mp_limb_t ws[MUL_BASECASE_ALLOC]; mpn_mul_basecase (ws, xp, n, yp, n); MPN_COPY (rp, ws, n); } else if (BELOW_THRESHOLD (n, MULLOW_DC_THRESHOLD)) { mpn_mullow_basecase (rp, xp, yp, n); } else if (BELOW_THRESHOLD (n, MULLOW_MUL_N_THRESHOLD)) { /* Divide-and-conquer */ mp_size_t n2 = n >> 1; /* floor(n/2) */ mp_size_t n1 = n - n2; /* ceil(n/2) */ mp_ptr tp; TMP_SDECL; TMP_SMARK; tp = TMP_SALLOC_LIMBS (n1); /* Split as x = x1 2^(n1 GMP_NUMB_BITS) + x0, y = y1 2^(n2 GMP_NUMB_BITS) + y0 */ /* x0 * y0 */ mpn_mul_n (rp, xp, yp, n2); if (n1 != n2) rp[2 * n2] = mpn_addmul_1 (rp + n2, yp, n2, xp[n2]); /* x1 * y0 * 2^(n1 GMP_NUMB_BITS) */ mpn_mullow_n (tp, xp + n1, yp, n2); mpn_add_n (rp + n1, rp + n1, tp, n2); /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */ mpn_mullow_n (tp, yp + n2, xp, n1); mpn_add_n (rp + n2, rp + n2, tp, n1); TMP_SFREE; } else {
void impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size) { mp_size_t i; mp_limb_t cy_limb; mp_limb_t v_limb; /* Multiply by the first limb in V separately, as the result can be stored (not added) to PROD. We also avoid a loop for zeroing. */ v_limb = vp[0]; if (v_limb <= 1) { if (v_limb == 1) MPN_COPY (prodp, up, size); else MPN_ZERO (prodp, size); cy_limb = 0; } else cy_limb = mpn_mul_1 (prodp, up, size, v_limb); prodp[size] = cy_limb; prodp++; /* For each iteration in the outer loop, multiply one limb from U with one limb from V, and add it to PROD. */ for (i = 1; i < size; i++) { v_limb = vp[i]; if (v_limb <= 1) { cy_limb = 0; if (v_limb == 1) cy_limb = mpn_add_n (prodp, prodp, up, size); } else cy_limb = mpn_addmul_1 (prodp, up, size, v_limb); prodp[size] = cy_limb; prodp++; } }
void _nmod_poly_div_basecase_1(mp_ptr Q, mp_ptr W, mp_srcptr A, long A_len, mp_srcptr B, long B_len, nmod_t mod) { mp_limb_t lead_inv = n_invmod(B[B_len - 1], mod.n); long len, coeff = A_len - B_len; mp_ptr R1 = W; mp_srcptr Btop = B + B_len - 1; mpn_copyi(R1, A + B_len - 1, A_len - B_len + 1); while (coeff >= 0) { R1[coeff] = n_mod2_preinv(R1[coeff], mod.n, mod.ninv); while (coeff >= 0 && R1[coeff] == 0L) { Q[coeff--] = 0L; if (coeff >= 0) R1[coeff] = n_mod2_preinv(R1[coeff], mod.n, mod.ninv); } if (coeff >= 0) { mp_limb_t c, * R_sub; Q[coeff] = n_mulmod2_preinv(R1[coeff], lead_inv, mod.n, mod.ninv); c = n_negmod(Q[coeff], mod.n); len = FLINT_MIN(B_len - 1, coeff); R_sub = R1 + coeff - len; if (len > 0) mpn_addmul_1(R_sub, Btop - len, len, c); coeff--; } } }
void _nmod_poly_rem_basecase_3(mp_ptr R, mp_ptr W, mp_srcptr A, long lenA, mp_srcptr B, long lenB, nmod_t mod) { if (lenB > 1) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); long iR, i; mp_ptr B3 = W, R3 = W + 3*(lenB - 1); for (i = 0; i < lenB - 1; i++) { B3[3 * i] = B[i]; B3[3 * i + 1] = 0; B3[3 * i + 2] = 0; } for (i = 0; i < lenA; i++) { R3[3 * i] = A[i]; R3[3 * i + 1] = 0; R3[3 * i + 2] = 0; } for (iR = lenA - 1; iR >= lenB - 1; iR--) { const mp_limb_t r = n_lll_mod_preinv(R3[3*iR + 2], R3[3*iR + 1], R3[3*iR], mod.n, mod.ninv); if (r != 0) { const mp_limb_t q = n_mulmod2_preinv(r, invL, mod.n, mod.ninv); const mp_limb_t c = n_negmod(q, mod.n); mpn_addmul_1(R3 + 3 * (iR - lenB + 1), B3, 3 * lenB - 3, c); } } for (iR = 0; iR < lenB - 1; iR++) R[iR] = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1], R3[3 * iR], mod.n, mod.ninv); } }