static int ref_modinv (mp_limb_t *rp, const mp_limb_t *ap, const mp_limb_t *mp, mp_size_t mn) { mp_limb_t tp[4*(mn+1)]; mp_limb_t *up = tp; mp_limb_t *vp = tp + mn+1; mp_limb_t *gp = tp + 2*(mn+1); mp_limb_t *sp = tp + 3*(mn+1); mp_size_t gn, sn; mpn_copyi (up, ap, mn); mpn_copyi (vp, mp, mn); gn = mpn_gcdext (gp, sp, &sn, up, mn, vp, mn); if (gn != 1 || gp[0] != 1) return 0; if (sn < 0) mpn_sub (sp, mp, mn, sp, -sn); else if (sn < mn) /* Zero-pad. */ mpn_zero (sp + sn, mn - sn); mpn_copyi (rp, sp, mn); return 1; }
int mpn_mulmod_Bexpp1_fft (mp_ptr op, mp_size_t pl, mp_srcptr n, mp_size_t nl, mp_srcptr m, mp_size_t ml) { mp_ptr a, b, tt; mp_limb_t cy; TMP_DECL; TMP_MARK; /* temporary space */ tt = TMP_ALLOC_LIMBS(2*pl); /* make copies of inputs, padded out to pl limbs */ a = TMP_ALLOC_LIMBS(pl + 1); mpn_copyi(a, n, nl); MPN_ZERO(a + nl, pl + 1 - nl); b = TMP_ALLOC_LIMBS(pl + 1); mpn_copyi(b, m, ml); MPN_ZERO(b + ml, pl + 1 - ml); /* this function only cares about the product, limbs = pl*GMP_LIMB_BITS */ cy = mpn_mulmod_Bexpp1(op, a, b, pl, tt); TMP_FREE; return cy; }
static int modinv_gcd (const struct ecc_curve *ecc, mp_limb_t *rp, mp_limb_t *ap, mp_limb_t *tp) { mp_size_t size = ecc->p.size; mp_limb_t *up = tp; mp_limb_t *vp = tp + size+1; mp_limb_t *gp = tp + 2*(size+1); mp_limb_t *sp = tp + 3*(size+1); mp_size_t gn, sn; mpn_copyi (up, ap, size); mpn_copyi (vp, ecc->p.m, size); gn = mpn_gcdext (gp, sp, &sn, up, size, vp, size); if (gn != 1 || gp[0] != 1) return 0; if (sn < 0) mpn_sub (sp, ecc->p.m, size, sp, -sn); else if (sn < size) /* Zero-pad. */ mpn_zero (sp + sn, size - sn); mpn_copyi (rp, sp, size); return 1; }
static void test_one(const char *name, const struct ecc_modulo *m, const mpz_t r) { mp_limb_t a[MAX_SIZE]; mp_limb_t t[MAX_SIZE]; mp_limb_t ref[MAX_SIZE]; mpz_limbs_copy (a, r, 2*m->size); ref_mod (ref, a, m->m, m->size); mpn_copyi (t, a, 2*m->size); m->mod (m, t); if (mpn_cmp (t, m->m, m->size) >= 0) mpn_sub_n (t, t, m->m, m->size); if (mpn_cmp (t, ref, m->size)) { fprintf (stderr, "m->mod %s failed: bit_size = %u\n", name, m->bit_size); fprintf (stderr, "a = "); mpn_out_str (stderr, 16, a, 2*m->size); fprintf (stderr, "\nt = "); mpn_out_str (stderr, 16, t, m->size); fprintf (stderr, " (bad)\nref = "); mpn_out_str (stderr, 16, ref, m->size); fprintf (stderr, "\n"); abort (); } if (m->B_size < m->size) { mpn_copyi (t, a, 2*m->size); ecc_mod (m, t); if (mpn_cmp (t, m->m, m->size) >= 0) mpn_sub_n (t, t, m->m, m->size); if (mpn_cmp (t, ref, m->size)) { fprintf (stderr, "ecc_mod %s failed: bit_size = %u\n", name, m->bit_size); fprintf (stderr, "a = "); mpn_out_str (stderr, 16, a, 2*m->size); fprintf (stderr, "\nt = "); mpn_out_str (stderr, 16, t, m->size); fprintf (stderr, " (bad)\nref = "); mpn_out_str (stderr, 16, ref, m->size); fprintf (stderr, "\n"); abort (); } } }
void fft_adjust_sqrt2(mp_limb_t * r, mp_limb_t * i1, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w, mp_limb_t * temp) { mp_bitcnt_t wn = limbs*FLINT_BITS; mp_limb_t cy; mp_size_t j = i/2, k = w/2; mp_size_t y; mp_bitcnt_t b1; int negate = 0; b1 = j + wn/4 + i*k; if (b1 >= wn) { negate = 1; b1 -= wn; } y = b1/FLINT_BITS; b1 = b1%FLINT_BITS; /* multiply by 2^{j + wn/4 + i*k} */ if (y) { mpn_copyi(temp + y, i1, limbs - y); cy = mpn_neg_n(temp, i1 + limbs - y, y); temp[limbs] = 0; mpn_addmod_2expp1_1(temp + y, limbs - y, -i1[limbs]); mpn_sub_1(temp + y, temp + y, limbs - y + 1, cy); mpn_mul_2expmod_2expp1(r, temp, limbs, b1); } else mpn_mul_2expmod_2expp1(r, i1, limbs, b1); /* multiply by 2^{wn/2} */ y = limbs/2; cy = 0; mpn_copyi(temp + y, r, limbs - y); temp[limbs] = 0; if (y) cy = mpn_neg_n(temp, r + limbs - y, y); mpn_addmod_2expp1_1(temp + y, limbs - y, -r[limbs]); mpn_sub_1(temp + y, temp + y, limbs - y + 1, cy); /* shift by an additional half limb (rare) */ if (limbs & 1) mpn_mul_2expmod_2expp1(temp, temp, limbs, FLINT_BITS/2); /* subtract */ if (negate) mpn_sub_n(r, r, temp, limbs + 1); else mpn_sub_n(r, temp, r, limbs + 1); }
static void bench_modinv_gcd (void *p) { struct ecc_ctx *ctx = (struct ecc_ctx *) p; mpn_copyi (ctx->rp + ctx->ecc->p.size, ctx->ap, ctx->ecc->p.size); modinv_gcd (ctx->ecc, ctx->rp, ctx->rp + ctx->ecc->p.size, ctx->tp); }
static void bench_modq (void *p) { struct ecc_ctx *ctx = (struct ecc_ctx *) p; mpn_copyi (ctx->rp, ctx->ap, 2*ctx->ecc->p.size); ctx->ecc->q.mod(&ctx->ecc->q, ctx->rp); }
static void bench_reduce (void *p) { struct ecc_ctx *ctx = (struct ecc_ctx *) p; mpn_copyi (ctx->rp, ctx->ap, 2*ctx->ecc->p.size); ctx->ecc->p.reduce (&ctx->ecc->p, ctx->rp); }
void Inv(modp& ans,const modp& x,const Zp_Data& ZpD) { mp_limb_t g[MAX_MOD_SZ],xx[MAX_MOD_SZ],yy[MAX_MOD_SZ]; mp_size_t sz; mpn_copyi(xx,x.x,ZpD.t); mpn_copyi(yy,ZpD.prA,ZpD.t); mpn_gcdext(g,ans.x,&sz,xx,ZpD.t,yy,ZpD.t); if (sz<0) { mpn_sub(ans.x,ZpD.prA,ZpD.t,ans.x,-sz); sz=-sz; } else { for (int i=sz; i<ZpD.t; i++) { ans.x[i]=0; } } if (ZpD.montgomery) { ZpD.Mont_Mult(ans.x,ans.x,ZpD.R3); } }
void _nmod_poly_rem_basecase_1(mp_ptr R, mp_ptr W, mp_srcptr A, long lenA, mp_srcptr B, long lenB, nmod_t mod) { if (lenB > 1) { const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n); long iR; mp_ptr R1 = W; mpn_copyi(R1, A, lenA); for (iR = lenA - 1; iR >= lenB - 1; iR--) { if (R1[iR] != 0) { const mp_limb_t q = n_mulmod2_preinv(R1[iR], invL, mod.n, mod.ninv); const mp_limb_t c = n_negmod(q, mod.n); mpn_addmul_1(R1 + iR - lenB + 1, B, lenB - 1, c); } } _nmod_vec_reduce(R, R1, lenB - 1, mod); } }
void assignOne(modp& x,const Zp_Data& ZpD) { if (ZpD.montgomery) { mpn_copyi(x.x,ZpD.R,ZpD.t); } else { assignZero(x,ZpD); x.x[0]=1; } }
void nmod_poly_cosh_series(nmod_poly_t g, const nmod_poly_t h, long n) { mp_ptr g_coeffs, h_coeffs; nmod_poly_t t1; long h_len; h_len = h->length; if (h_len > 0 && h->coeffs[0] != 0UL) { printf("Exception: nmod_poly_cosh_series: constant term != 0\n"); abort(); } if (h_len == 1 || n < 2) { nmod_poly_zero(g); if (n > 0) nmod_poly_set_coeff_ui(g, 0, 1UL); return; } if (h_len < n) { h_coeffs = _nmod_vec_init(n); mpn_copyi(h_coeffs, h->coeffs, h_len); mpn_zero(h_coeffs + h_len, n - h_len); } else h_coeffs = h->coeffs; if (h == g && h_len >= n) { nmod_poly_init2(t1, h->mod.n, n); g_coeffs = t1->coeffs; } else { nmod_poly_fit_length(g, n); g_coeffs = g->coeffs; } _nmod_poly_cosh_series(g_coeffs, h_coeffs, n, h->mod); if (h == g && h_len >= n) { nmod_poly_swap(g, t1); nmod_poly_clear(t1); } g->length = n; if (h_len < n) _nmod_vec_free(h_coeffs); _nmod_poly_normalise(g); }
void mpz_limbs_copy (mp_limb_t *xp, mpz_srcptr x, mp_size_t n) { mp_size_t xn = mpz_size (x); assert (xn <= n); mpn_copyi (xp, mpz_limbs_read (x), xn); if (xn < n) mpn_zero (xp + xn, n - xn); }
void nmod_poly_revert_series_lagrange(nmod_poly_t Qinv, const nmod_poly_t Q, long n) { mp_ptr Qinv_coeffs, Q_coeffs; nmod_poly_t t1; long Qlen; Qlen = Q->length; if (Qlen < 2 || Q->coeffs[0] != 0 || Q->coeffs[1] == 0) { printf("exception: nmod_poly_revert_series_lagrange: input must have " "zero constant and an invertible coefficient of x^1"); abort(); } if (Qlen < n) { Q_coeffs = _nmod_vec_init(n); mpn_copyi(Q_coeffs, Q->coeffs, Qlen); mpn_zero(Q_coeffs + Qlen, n - Qlen); } else Q_coeffs = Q->coeffs; if (Q == Qinv && Qlen >= n) { nmod_poly_init2(t1, Q->mod.n, n); Qinv_coeffs = t1->coeffs; } else { nmod_poly_fit_length(Qinv, n); Qinv_coeffs = Qinv->coeffs; } _nmod_poly_revert_series_lagrange(Qinv_coeffs, Q_coeffs, n, Q->mod); if (Q == Qinv && Qlen >= n) { nmod_poly_swap(Qinv, t1); nmod_poly_clear(t1); } Qinv->length = n; if (Qlen < n) _nmod_vec_clear(Q_coeffs); _nmod_poly_normalise(Qinv); }
void nmod_poly_inv_series_basecase(nmod_poly_t Qinv, const nmod_poly_t Q, long n) { mp_ptr Qinv_coeffs, Q_coeffs; nmod_poly_t t1; long Qlen; Qlen = Q->length; if (n == 0 || Q->length == 0 || Q->coeffs[0] == 0) { printf("Exception: division by zero in nmod_poly_inv_series_basecase\n"); abort(); } if (Qlen < n) { Q_coeffs = _nmod_vec_init(n); mpn_copyi(Q_coeffs, Q->coeffs, Qlen); mpn_zero(Q_coeffs + Qlen, n - Qlen); } else Q_coeffs = Q->coeffs; if (Q == Qinv && Qlen >= n) { nmod_poly_init2(t1, Q->mod.n, n); Qinv_coeffs = t1->coeffs; } else { nmod_poly_fit_length(Qinv, n); Qinv_coeffs = Qinv->coeffs; } _nmod_poly_inv_series_basecase(Qinv_coeffs, Q_coeffs, n, Q->mod); if (Q == Qinv && Qlen >= n) { nmod_poly_swap(Qinv, t1); nmod_poly_clear(t1); } Qinv->length = n; if (Qlen < n) _nmod_vec_free(Q_coeffs); _nmod_poly_normalise(Qinv); }
int mpn_mul_fft(mp_ptr rp, mp_size_t rn, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn, int k) { mp_ptr rpp, app, bpp, tpp; mp_size_t t = rn + 1; TMP_DECL; TMP_MARK; rpp = (mp_ptr)TMP_ALLOC_LIMBS(t); tpp = (mp_ptr)TMP_ALLOC_LIMBS(t); app = (mp_ptr)TMP_ALLOC_LIMBS(t); bpp = (mp_ptr)TMP_ALLOC_LIMBS(t); mpn_copyi(app, ap, an); mpn_zero(app + an, t - an); mpn_copyi(bpp, bp, bn); mpn_zero(bpp + bn, t - bn); mpn_mulmod_Bexpp1(rpp, app, bpp, rn, tpp); mpn_copyi(rp, rpp, rn); t = rpp[rn]; TMP_FREE; return t; }
/* NOTE: Caller should check if r or s is zero. */ void ecc_ecdsa_sign (const struct ecc_curve *ecc, const mp_limb_t *zp, /* Random nonce, must be invertible mod ecc group order. */ const mp_limb_t *kp, size_t length, const uint8_t *digest, mp_limb_t *rp, mp_limb_t *sp, mp_limb_t *scratch) { #define P scratch #define kinv scratch /* Needs 5*ecc->p.size for computation */ #define hp (scratch + ecc->p.size) /* NOTE: ecc->p.size + 1 limbs! */ #define tp (scratch + 2*ecc->p.size) /* Procedure, according to RFC 6090, "KT-I". q denotes the group order. 1. k <-- uniformly random, 0 < k < q 2. R <-- (r_x, r_y) = k g 3. s1 <-- r_x mod q 4. s2 <-- (h + z*s1)/k mod q. */ ecc->mul_g (ecc, P, kp, P + 3*ecc->p.size); /* x coordinate only, modulo q */ ecc->h_to_a (ecc, 2, rp, P, P + 3*ecc->p.size); /* Invert k, uses 4 * ecc->p.size including scratch */ ecc->q.invert (&ecc->q, kinv, kp, tp); /* NOTE: Also clobbers hp */ /* Process hash digest */ ecc_hash (&ecc->q, hp, length, digest); ecc_modq_mul (ecc, tp, zp, rp); ecc_modq_add (ecc, hp, hp, tp); ecc_modq_mul (ecc, tp, hp, kinv); mpn_copyi (sp, tp, ecc->p.size); #undef P #undef hp #undef kinv #undef tp }
void _nmod_poly_div_basecase_1(mp_ptr Q, mp_ptr W, mp_srcptr A, long A_len, mp_srcptr B, long B_len, nmod_t mod) { mp_limb_t lead_inv = n_invmod(B[B_len - 1], mod.n); long len, coeff = A_len - B_len; mp_ptr R1 = W; mp_srcptr Btop = B + B_len - 1; mpn_copyi(R1, A + B_len - 1, A_len - B_len + 1); while (coeff >= 0) { R1[coeff] = n_mod2_preinv(R1[coeff], mod.n, mod.ninv); while (coeff >= 0 && R1[coeff] == 0L) { Q[coeff--] = 0L; if (coeff >= 0) R1[coeff] = n_mod2_preinv(R1[coeff], mod.n, mod.ninv); } if (coeff >= 0) { mp_limb_t c, * R_sub; Q[coeff] = n_mulmod2_preinv(R1[coeff], lead_inv, mod.n, mod.ninv); c = n_negmod(Q[coeff], mod.n); len = FLINT_MIN(B_len - 1, coeff); R_sub = R1 + coeff - len; if (len > 0) mpn_addmul_1(R_sub, Btop - len, len, c); coeff--; } } }
void mpir_fft_adjust(mp_ptr r, mp_ptr i1, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w) { mp_bitcnt_t b1; mp_limb_t cy; mp_size_t x; b1 = i*w; x = b1/GMP_LIMB_BITS; b1 = b1%GMP_LIMB_BITS; if (x) { mpn_copyi(r + x, i1, limbs - x); r[limbs] = 0; cy = mpn_neg_n(r, i1 + limbs - x, x); mpn_addmod_2expp1_1(r + x, limbs - x, -i1[limbs]); mpn_sub_1(r + x, r + x, limbs - x + 1, cy); mpn_mul_2expmod_2expp1(r, r, limbs, b1); } else mpn_mul_2expmod_2expp1(r, i1, limbs, b1); }
/* Divide (arrayg, limbsg) by the positive value gc inplace and return the number of limbs written */ mp_size_t mpn_tdiv_q_fmpz_inplace(mp_ptr arrayg, mp_size_t limbsg, fmpz_t gc) { if (fmpz_size(gc) == 1) { mpn_divmod_1(arrayg, arrayg, limbsg, fmpz_get_ui(gc)); return limbsg - (arrayg[limbsg - 1] == 0); } else { mp_size_t tlimbs; __mpz_struct * mpz_ptr = COEFF_TO_PTR(*gc); mp_ptr temp = flint_malloc(limbsg*sizeof(mp_limb_t)); mpn_copyi(temp, arrayg, limbsg); mpn_tdiv_q(arrayg, temp, limbsg, mpz_ptr->_mp_d, mpz_ptr->_mp_size); tlimbs = limbsg - mpz_ptr->_mp_size + 1; tlimbs -= (arrayg[tlimbs - 1] == 0); flint_free(temp); return tlimbs; } }
void nmod_poly_tanh_series(nmod_poly_t g, const nmod_poly_t h, long n) { mp_ptr h_coeffs; long h_len = h->length; if (h_len > 0 && h->coeffs[0] != 0UL) { printf("Exception: nmod_poly_tanh_series: constant term != 0\n"); abort(); } if (h_len == 1 || n < 2) { nmod_poly_zero(g); return; } nmod_poly_fit_length(g, n); if (h_len < n) { h_coeffs = _nmod_vec_init(n); mpn_copyi(h_coeffs, h->coeffs, h_len); mpn_zero(h_coeffs + h_len, n - h_len); } else h_coeffs = h->coeffs; _nmod_poly_tanh_series(g->coeffs, h_coeffs, n, h->mod); if (h_len < n) _nmod_vec_free(h_coeffs); g->length = n; _nmod_poly_normalise(g); }
void _nmod_poly_div_divconquer(mp_ptr Q, mp_srcptr A, long lenA, mp_srcptr B, long lenB, nmod_t mod) { if (lenA < 2 * lenB - 1) { /* Convert unbalanced division into a 2 n1 - 1 by n1 division */ const long n1 = lenA - lenB + 1; const long n2 = lenB - n1; mp_srcptr p1 = A + n2; mp_srcptr d1 = B + n2; mp_ptr V = _nmod_vec_init(n1 - 1 + NMOD_DIVREM_DC_ITCH(n1, mod)); mp_ptr W = V + NMOD_DIVREM_DC_ITCH(n1, mod); _nmod_poly_div_divconquer_recursive(Q, W, V, p1, d1, n1, mod); _nmod_vec_clear(V); } else if (lenA > 2 * lenB - 1) { /* We shift A right until it is of length 2 lenB - 1, call this p1 */ const long shift = lenA - 2 * lenB + 1; mp_srcptr p1 = A + shift; mp_ptr V = _nmod_vec_init(lenA + (3 * lenB - 2) + NMOD_DIVREM_DC_ITCH(lenB, mod)); mp_ptr R = V + NMOD_DIVREM_DC_ITCH(lenB, mod); mp_ptr W = R + lenB - 1; mp_ptr q1 = Q + shift; mp_ptr q2 = Q; mp_ptr dq1 = W; mp_ptr d1q1 = dq1 + shift; /* Set q1 to p1 div B, a 2 lenB - 1 by lenB division, so q1 ends up being of length lenB; set d1q1 = d1 * q1 of length 2 lenB - 1 */ _nmod_poly_divrem_divconquer_recursive(q1, d1q1, R, V, p1, B, lenB, mod); /* We have dq1 = d1 * q1 * x^shift, of length lenA Compute R = A - dq1; the first lenB coeffs represent remainder terms (zero if division is exact), leaving lenA - lenB significant terms which we use in the division */ mpn_copyi(dq1, A, shift); _nmod_vec_sub(dq1 + shift, A + shift, dq1 + shift, lenB - 1, mod); /* Compute q2 = trunc(R) div B; it is a smaller division than the original since len(trunc(R)) = lenA - lenB */ _nmod_poly_div_divconquer(q2, dq1, lenA - lenB, B, lenB, mod); /* We have Q = q1 * x^shift + q2; Q has length lenB + shift; note q2 has length shift since the above division is lenA - lenB by lenB */ _nmod_vec_clear(V); } else /* lenA = 2 * lenB - 1 */ { mp_ptr V = _nmod_vec_init(lenB - 1 + NMOD_DIVREM_DC_ITCH(lenB, mod)); mp_ptr W = V + NMOD_DIVREM_DC_ITCH(lenB, mod); _nmod_poly_div_divconquer_recursive(Q, W, V, A, B, lenB, mod); _nmod_vec_clear(V); } }
void nmod_poly_exp_series(nmod_poly_t f, const nmod_poly_t h, long n) { mp_ptr f_coeffs, h_coeffs; nmod_poly_t t1; long hlen, k; nmod_poly_fit_length(f, n); hlen = h->length; if (hlen > 0 && h->coeffs[0] != 0UL) { printf("Exception: nmod_poly_exp_series: constant term != 0\n"); abort(); } if (n <= 1 || hlen == 0) { if (n == 0) { nmod_poly_zero(f); } else { f->coeffs[0] = 1UL; f->length = 1; } return; } /* Handle monomials */ for (k = 0; h->coeffs[k] == 0UL && k < n - 1; k++); if (k == hlen - 1 || k == n - 1) { hlen = FLINT_MIN(hlen, n); _nmod_poly_exp_series_monomial_ui(f->coeffs, h->coeffs[hlen-1], hlen - 1, n, f->mod); f->length = n; _nmod_poly_normalise(f); return; } if (n < NMOD_NEWTON_EXP_CUTOFF2) { _nmod_poly_exp_series_basecase(f->coeffs, h->coeffs, hlen, n, f->mod); f->length = n; _nmod_poly_normalise(f); return; } if (hlen < n) { h_coeffs = _nmod_vec_init(n); mpn_copyi(h_coeffs, h->coeffs, hlen); mpn_zero(h_coeffs + hlen, n - hlen); } else h_coeffs = h->coeffs; if (h == f && hlen >= n) { nmod_poly_init2(t1, h->mod.n, n); f_coeffs = t1->coeffs; } else { nmod_poly_fit_length(f, n); f_coeffs = f->coeffs; } _nmod_poly_exp_series(f_coeffs, h_coeffs, n, f->mod); if (h == f && hlen >= n) { nmod_poly_swap(f, t1); nmod_poly_clear(t1); } f->length = n; if (hlen < n) _nmod_vec_free(h_coeffs); _nmod_poly_normalise(f); }
void check_functions (void) { mp_limb_t wp[2], wp2[2], xp[2], yp[2], r; int i; memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; yp[0] = 456; mpn_add_n (wp, xp, yp, (mp_size_t) 1); ASSERT_ALWAYS (wp[0] == 579); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; wp[0] = 456; r = mpn_addmul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2)); ASSERT_ALWAYS (wp[0] == 702); ASSERT_ALWAYS (r == 0); } #if HAVE_NATIVE_mpn_copyd memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; xp[1] = 456; mpn_copyd (xp+1, xp, (mp_size_t) 1); ASSERT_ALWAYS (xp[1] == 123); } #endif #if HAVE_NATIVE_mpn_copyi memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; xp[1] = 456; mpn_copyi (xp, xp+1, (mp_size_t) 1); ASSERT_ALWAYS (xp[0] == 456); } #endif memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 1605; mpn_divexact_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(5)); ASSERT_ALWAYS (wp[0] == 321); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 1296; r = mpn_divexact_by3c (wp, xp, (mp_size_t) 1, CNST_LIMB(0)); ASSERT_ALWAYS (wp[0] == 432); ASSERT_ALWAYS (r == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 578; r = mpn_divexact_byfobm1 (wp, xp, (mp_size_t) 1, CNST_LIMB(17),CNST_LIMB(-1)/CNST_LIMB(17)); ASSERT_ALWAYS (wp[0] == 34); ASSERT_ALWAYS (r == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 287; r = mpn_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1, CNST_LIMB(7)); ASSERT_ALWAYS (wp[1] == 41); ASSERT_ALWAYS (wp[0] == 0); ASSERT_ALWAYS (r == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 290; r = mpn_divrem_euclidean_qr_1 (wp, 0, xp, (mp_size_t) 1, CNST_LIMB(7)); ASSERT_ALWAYS (wp[0] == 41); ASSERT_ALWAYS (r == 3); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 12; r = mpn_gcd_1 (xp, (mp_size_t) 1, CNST_LIMB(9)); ASSERT_ALWAYS (r == 3); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 0x1001; mpn_lshift (wp, xp, (mp_size_t) 1, 1); ASSERT_ALWAYS (wp[0] == 0x2002); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 14; r = mpn_mod_1 (xp, (mp_size_t) 1, CNST_LIMB(4)); ASSERT_ALWAYS (r == 2); } #if (GMP_NUMB_BITS % 4) == 0 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { int bits = (GMP_NUMB_BITS / 4) * 3; mp_limb_t mod = (CNST_LIMB(1) << bits) - 1; mp_limb_t want = GMP_NUMB_MAX % mod; xp[0] = GMP_NUMB_MAX; r = mpn_mod_34lsub1 (xp, (mp_size_t) 1); ASSERT_ALWAYS (r % mod == want); } #endif // DECL_modexact_1c_odd ((*modexact_1c_odd)); memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 14; r = mpn_mul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(4)); ASSERT_ALWAYS (wp[0] == 56); ASSERT_ALWAYS (r == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 5; yp[0] = 7; mpn_mul_basecase (wp, xp, (mp_size_t) 1, yp, (mp_size_t) 1); ASSERT_ALWAYS (wp[0] == 35); ASSERT_ALWAYS (wp[1] == 0); } #if HAVE_NATIVE_mpn_preinv_divrem_1 && GMP_NAIL_BITS == 0 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 0x101; r = mpn_preinv_divrem_1 (wp, (mp_size_t) 1, xp, (mp_size_t) 1, GMP_LIMB_HIGHBIT, refmpn_invert_limb (GMP_LIMB_HIGHBIT), 0); ASSERT_ALWAYS (wp[0] == 0x202); ASSERT_ALWAYS (wp[1] == 0); ASSERT_ALWAYS (r == 0); } #endif #if GMP_NAIL_BITS == 0 memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = GMP_LIMB_HIGHBIT+123; r = mpn_preinv_mod_1 (xp, (mp_size_t) 1, GMP_LIMB_HIGHBIT, refmpn_invert_limb (GMP_LIMB_HIGHBIT)); ASSERT_ALWAYS (r == 123); } #endif memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 5; modlimb_invert(r,xp[0]); r=-r; yp[0]=43; yp[1]=75; mpn_redc_1 (wp, yp, xp, (mp_size_t) 1,r); ASSERT_ALWAYS (wp[0] == 78); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0]=5; yp[0]=3; mpn_sumdiff_n (wp, wp2,xp, yp,1); ASSERT_ALWAYS (wp[0] == 8); ASSERT_ALWAYS (wp2[0] == 2); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 0x8008; mpn_rshift (wp, xp, (mp_size_t) 1, 1); ASSERT_ALWAYS (wp[0] == 0x4004); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 5; mpn_sqr_basecase (wp, xp, (mp_size_t) 1); ASSERT_ALWAYS (wp[0] == 25); ASSERT_ALWAYS (wp[1] == 0); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 999; yp[0] = 666; mpn_sub_n (wp, xp, yp, (mp_size_t) 1); ASSERT_ALWAYS (wp[0] == 333); } memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); for (i = 0; i < 2; i++) { xp[0] = 123; wp[0] = 456; r = mpn_submul_1 (wp, xp, (mp_size_t) 1, CNST_LIMB(2)); ASSERT_ALWAYS (wp[0] == 210); ASSERT_ALWAYS (r == 0); } }
void mpz_set_n (mpz_t r, const mp_limb_t *xp, mp_size_t xn) { mpn_copyi (mpz_limbs_write (r, xn), xp, xn); mpz_limbs_finish (r, xn); }
int main(void) { mp_bitcnt_t depth, w; flint_rand_t state; printf("fft/ifft_mfa_truncate_sqrt2...."); fflush(stdout); flint_randinit(state); _flint_rand_init_gmp(state); for (depth = 6; depth <= 13; depth++) { for (w = 1; w <= 5; w++) { mp_size_t n = (1UL<<depth); mp_size_t trunc = 2*n + n_randint(state, 2*n) + 1; mp_size_t n1 = (1UL<<(depth/2)); mp_size_t limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, * t1, * t2, * s1; trunc = 2*n1*((trunc + 2*n1 - 1)/(2*n1)); ii = flint_malloc((4*(n + n*size) + 3*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) ii + 4*n; i < 4*n; i++, ptr += size) { ii[i] = ptr; random_fermat(ii[i], state, limbs); } t1 = ptr; t2 = t1 + size; s1 = t2 + size; for (i = 0; i < 4*n; i++) mpn_normmod_2expp1(ii[i], limbs); jj = flint_malloc(4*(n + n*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) jj + 4*n; i < 4*n; i++, ptr += size) { jj[i] = ptr; mpn_copyi(jj[i], ii[i], size); } fft_mfa_truncate_sqrt2(ii, n, w, &t1, &t2, &s1, n1, trunc); ifft_mfa_truncate_sqrt2(ii, n, w, &t1, &t2, &s1, n1, trunc); for (i = 0; i < trunc; i++) { mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 2); mpn_normmod_2expp1(ii[i], limbs); } for (i = 0; i < trunc; i++) { if (mpn_cmp(ii[i], jj[i], size) != 0) { printf("FAIL:\n"); printf("n = %ld, trunc = %ld\n", n, trunc); printf("Error in entry %ld\n", i); abort(); } } flint_free(ii); flint_free(jj); } } flint_randclear(state); printf("PASS\n"); return 0; }
/* Add two points on an Edwards curve, in homogeneous coordinates */ void ecc_add_ehh (const struct ecc_curve *ecc, mp_limb_t *r, const mp_limb_t *p, const mp_limb_t *q, mp_limb_t *scratch) { #define x1 p #define y1 (p + ecc->p.size) #define z1 (p + 2*ecc->p.size) #define x2 q #define y2 (q + ecc->p.size) #define z2 (q + 2*ecc->p.size) #define x3 r #define y3 (r + ecc->p.size) #define z3 (r + 2*ecc->p.size) /* Formulas (from djb, http://www.hyperelliptic.org/EFD/g1p/auto-edwards-projective.html#addition-add-2007-bl): Computation Operation Live variables C = x1*x2 mul C D = y1*y2 mul C, D T = (x1+y1)(x2+y2) - C - D, mul C, D, T E = b*C*D 2 mul C, E, T (Replace C <-- D - C) A = z1*z2 mul A, C, E, T B = A^2 sqr A, B, C, E, T F = B - E A, B, C, E, F, T G = B + E A, C, F, G, T x3 = A*F*T 2 mul A, C, G y3 = A*G*(D-C) 2 mul F, G z3 = F*G mul But when working with the twist curve, we have to negate the factor C = x1*x2. We change subtract to add in the y3 expression, and swap F and G. */ #define C scratch #define D (scratch + ecc->p.size) #define T (scratch + 2*ecc->p.size) #define E (scratch + 3*ecc->p.size) #define A (scratch + 4*ecc->p.size) #define B (scratch + 5*ecc->p.size) #define F D #define G E ecc_modp_mul (ecc, C, x1, x2); ecc_modp_mul (ecc, D, y1, y2); ecc_modp_add (ecc, A, x1, y1); ecc_modp_add (ecc, B, x2, y2); ecc_modp_mul (ecc, T, A, B); ecc_modp_sub (ecc, T, T, C); ecc_modp_sub (ecc, T, T, D); ecc_modp_mul (ecc, x3, C, D); ecc_modp_mul (ecc, E, x3, ecc->b); ecc_modp_add (ecc, C, D, C); /* ! */ ecc_modp_mul (ecc, A, z1, z2); ecc_modp_sqr (ecc, B, A); ecc_modp_sub (ecc, F, B, E); ecc_modp_add (ecc, G, B, E); /* x3 */ ecc_modp_mul (ecc, B, G, T); /* ! */ ecc_modp_mul (ecc, x3, B, A); /* y3 */ ecc_modp_mul (ecc, B, F, C); /* ! */ ecc_modp_mul (ecc, y3, B, A); /* z3 */ ecc_modp_mul (ecc, B, F, G); mpn_copyi (z3, B, ecc->p.size); }
void _nmod_poly_shift_right(mp_ptr res, mp_srcptr poly, long len, long k) { mpn_copyi(res, poly + k, len); }
int main(void) { mp_bitcnt_t depth, w; flint_rand_t state; printf("fft/ifft_radix2...."); fflush(stdout); flint_randinit(state); _flint_rand_init_gmp(state); for (depth = 6; depth <= 12; depth++) { for (w = 1; w <= 5; w++) { mp_size_t n = (1UL<<depth); mp_size_t limbs = (n*w)/GMP_LIMB_BITS; mp_size_t size = limbs + 1; mp_size_t i; mp_limb_t * ptr; mp_limb_t ** ii, ** jj, *t1, *t2; ii = flint_malloc((2*(n + n*size) + 2*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size) { ii[i] = ptr; random_fermat(ii[i], state, limbs); } t1 = ptr; t2 = t1 + size; for (i = 0; i < 2*n; i++) mpn_normmod_2expp1(ii[i], limbs); jj = flint_malloc(2*(n + n*size)*sizeof(mp_limb_t)); for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size) { jj[i] = ptr; mpn_copyi(jj[i], ii[i], size); } fft_radix2(ii, n, w, &t1, &t2); ifft_radix2(ii, n, w, &t1, &t2); for (i = 0; i < 2*n; i++) { mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1); mpn_normmod_2expp1(ii[i], limbs); } for (i = 0; i < 2*n; i++) { if (mpn_cmp(ii[i], jj[i], size) != 0) { printf("FAIL:\n"); printf("Error in entry %ld\n", i); abort(); } } flint_free(ii); flint_free(jj); } } flint_randclear(state); printf("PASS\n"); return 0; }
void nmod_poly_compose_mod_horner(nmod_poly_t res, const nmod_poly_t poly1, const nmod_poly_t poly2, const nmod_poly_t poly3) { long len1 = poly1->length; long len2 = poly2->length; long len3 = poly3->length; long len = len3 - 1; mp_ptr ptr2; if (len3 == 0) { printf("exception: division by zero in nmod_poly_compose_mod_horner\n"); abort(); } if (len1 == 0 || len3 == 1) { nmod_poly_zero(res); return; } if (len1 == 1) { nmod_poly_set(res, poly1); return; } if (res == poly3 || res == poly1) { nmod_poly_t tmp; nmod_poly_init_preinv(tmp, res->mod.n, res->mod.ninv); nmod_poly_compose_mod_horner(tmp, poly1, poly2, poly3); nmod_poly_swap(tmp, res); nmod_poly_clear(tmp); return; } ptr2 = _nmod_vec_init(len); if (len2 <= len) { mpn_copyi(ptr2, poly2->coeffs, len2); mpn_zero(ptr2 + len2, len - len2); } else { _nmod_poly_rem(ptr2, poly2->coeffs, len2, poly3->coeffs, len3, res->mod); } nmod_poly_fit_length(res, len); _nmod_poly_compose_mod_horner(res->coeffs, poly1->coeffs, len1, ptr2, poly3->coeffs, len3, res->mod); res->length = len; _nmod_poly_normalise(res); _nmod_vec_clear(ptr2); }