コード例 #1
0
ファイル: ngcd_step.c プロジェクト: bsmr-common-lisp/xcl
/* Multiply M by M1 from the right. Since the M1 elements fit in
   GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
   temporary space M->n */
static void
ngcd_matrix_mul_1 (struct ngcd_matrix *M, const struct ngcd_matrix1 *M1)
{
  unsigned row;
  mp_limb_t grow;
  for (row = 0, grow = 0; row < 2; row++)
    {
      mp_limb_t c0, c1;

      /* Compute (u, u') <-- (r00 u + r10 u', r01 u + r11 u') as

	  t   = u
	  u  *= r00
	  u  += r10 * u'
	  u' *= r11
	  u' += r01 * t
      */

      MPN_COPY (M->tp, M->p[row][0], M->n);
      c0 =     mpn_mul_1 (M->p[row][0], M->p[row][0], M->n, M1->u[0][0]);
      c0 += mpn_addmul_1 (M->p[row][0], M->p[row][1], M->n, M1->u[1][0]);
      M->p[row][0][M->n] = c0;

      c1 =     mpn_mul_1 (M->p[row][1], M->p[row][1], M->n, M1->u[1][1]);
      c1 += mpn_addmul_1 (M->p[row][1], M->tp,        M->n, M1->u[0][1]);
      M->p[row][1][M->n] = c1;

      grow |= (c0 | c1);
    }
  M->n += (grow != 0);
  ASSERT (M->n < M->alloc);
}
コード例 #2
0
ファイル: mulhigh_n.c プロジェクト: HRF92/mpir
/* (rp, 2n) = (xp, n)*(yp, n) / B^n */ 
inline static void
mpn_mulshort_n_basecase(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
  mp_size_t i, k;

#if GMP_NAIL_BITS==0
  mp_limb_t t1, t2, t3;
#endif

  ASSERT(n >= 3);  /* this restriction doesn't make a lot of sense in general */
  ASSERT_MPN(xp, n);
  ASSERT_MPN(yp, n);
  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n));
  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n));

  k = n - 2; /* so want short product sum_(i + j >= k) x[i]y[j]B^(i + j) */

#if GMP_NAIL_BITS!=0
  rp[n] = mpn_mul_1(rp + k, xp + k, 2, yp[0]);
#else

  umul_ppmm(t1, rp[k], xp[k], yp[0]);
  umul_ppmm(t3, t2, xp[k + 1], yp[0]);
  add_ssaaaa(rp[n], rp[k + 1], t3, t2, 0, t1);
#endif

  for (i = 1; i <= n - 2; i++)
     rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]);
  
  rp[n + n - 1] = mpn_addmul_1 (rp + n - 1, xp, n, yp[n - 1]);
  
  return;
}
コード例 #3
0
ファイル: relic_bn_sqr_low.c プロジェクト: Arash-Afshar/relic
void bn_sqra_low(dig_t *c, const dig_t *a, int size) {
	dig_t carry, digit = *a;

	carry = mpn_addmul_1(c, a, size, digit);
	mpn_add_1(c + size, c + size, size, carry);
	if (size > 1) {
		carry = mpn_addmul_1(c + 1, a + 1, size - 1, digit);
		mpn_add_1(c + size, c + size, size, carry);
	}
}
コード例 #4
0
ファイル: redc_2.c プロジェクト: RodneyBates/M3Devel
you lose
#endif

/* For testing purposes, define our own mpn_addmul_2 if there is none already
   available.  */
#ifndef HAVE_NATIVE_mpn_addmul_2
mp_limb_t
mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
{
  rp[n] = mpn_addmul_1 (rp, up, n, vp[0]);
  return mpn_addmul_1 (rp + 1, up, n, vp[1]);
}
コード例 #5
0
ファイル: ecc-modp.c プロジェクト: Distrotech/nettle
void
ecc_modp_addmul_1 (const struct ecc_curve *ecc, mp_limb_t *rp,
		   const mp_limb_t *ap, mp_limb_t b)
{
  mp_limb_t hi;

  assert (b <= 0xffffffff);
  hi = mpn_addmul_1 (rp, ap, ecc->size, b);
  hi = mpn_addmul_1 (rp, ecc->Bmodp, ecc->size, hi);
  assert (hi <= 1);
  hi = cnd_add_n (hi, rp, ecc->Bmodp, ecc->size);
  /* Sufficient roughly if b < B^size / p */
  assert (hi == 0);
}
コード例 #6
0
ファイル: div_basecase.c プロジェクト: goens/flint2
void
_nmod_poly_div_basecase_2(mp_ptr Q, mp_ptr W,
                             mp_srcptr A, long A_len, mp_srcptr B, long B_len,
                             nmod_t mod)
{
    long coeff, i, len;
    mp_limb_t lead_inv = n_invmod(B[B_len - 1], mod.n);
    mp_ptr B2, R2;
    mp_srcptr Btop;
    
    B2 = W;
    for (i = 0; i < B_len - 1; i++)
    {
        B2[2 * i] = B[i];
        B2[2 * i + 1] = 0;
    }
    Btop = B2 + 2*(B_len - 1);

    R2 = W + 2*(B_len - 1);
    for (i = 0; i < A_len - B_len + 1; i++)
    {
        R2[2 * i] = A[B_len + i - 1];
        R2[2 * i + 1] = 0;
    }

    coeff = A_len - B_len;
    
    while (coeff >= 0)
    {
        mp_limb_t r_coeff;
        r_coeff =
            n_ll_mod_preinv(R2[2 * coeff + 1], R2[2 * coeff], mod.n, mod.ninv);

        while (coeff >= 0 && r_coeff == 0L)
        {
            Q[coeff--] = 0L;
            if (coeff >= 0)
                r_coeff =
                    n_ll_mod_preinv(R2[2 * coeff + 1], R2[2 * coeff], mod.n,
                                    mod.ninv);
        }

        if (coeff >= 0)
        {
            mp_limb_t c, * R_sub;

            Q[coeff] =
                n_mulmod2_preinv(r_coeff, lead_inv, mod.n, mod.ninv);

            c = n_negmod(Q[coeff], mod.n);

            len = FLINT_MIN(B_len - 1, coeff);
            R_sub = R2 + 2 * (coeff - len);
            if (len > 0)
                mpn_addmul_1(R_sub, Btop - 2*len, 2 * len, c);

            coeff--;
        }
    }
}
コード例 #7
0
ファイル: mul_lo.c プロジェクト: CplusHua/yafu-setup-package
/* puts in {rp, n} the low part of {np, n} times {mp, n}, i.e. equivalent to:

   mp_ptr tp;
   TMP_DECL(marker);
   TMP_MARK(marker);
   tp = TMP_ALLOC_LIMBS (2 * n);
   mpn_mul_n (tp, np, mp, n);
   MPN_COPY (rp, tp, n);
   TMP_FREE(marker);
 */
void
ecm_mul_lo_basecase (mp_ptr rp, mp_srcptr np, mp_srcptr mp, mp_size_t n)
{
  mpn_mul_1 (rp, np, n, mp[0]);
  for (; --n;)
    mpn_addmul_1 (++rp, np, n, (++mp)[0]);
}
コード例 #8
0
ファイル: divrem_basecase.c プロジェクト: clear731/lattice
void
_nmod_poly_divrem_basecase_1(mp_ptr Q, mp_ptr R, mp_ptr W,
                             mp_srcptr A, slong lenA, mp_srcptr B, slong lenB,
                             nmod_t mod)
{
    const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n);
    slong iR;
    mp_ptr ptrQ = Q - lenB + 1;
    mp_ptr R1 = W;
    
    flint_mpn_copyi(R1, A, lenA);

    for (iR = lenA - 1; iR >= lenB - 1; iR--)
    {
        if (R1[iR] == 0)
        {
            ptrQ[iR] = WORD(0);
        }
        else 
        {
            ptrQ[iR] = n_mulmod2_preinv(R1[iR], invL, mod.n, mod.ninv);

            if (lenB > 1)
            {
                const mp_limb_t c = n_negmod(ptrQ[iR], mod.n);
                mpn_addmul_1(R1 + iR - lenB + 1, B, lenB - 1, c);
            }
        }
    }

    if (lenB > 1)
        _nmod_vec_reduce(R, R1, lenB - 1, mod);
}
コード例 #9
0
ファイル: relic_fp_rdc_low.c プロジェクト: ekr/hacrypto
void fp_rdcn_low(dig_t *c, dig_t *a) {
	int i;
	dig_t r, c0, c1, u, *tmp;
	const dig_t *m;

	u = *(fp_prime_get_rdc());
	m = fp_prime_get();

	tmp = a;

	c1 = 0;
	for (i = 0; i < FP_DIGS; i++, tmp++) {
		r = (dig_t)(*tmp * u);
		c0 = mpn_addmul_1(tmp, m, FP_DIGS, r);
		c1 += mpn_add_1(tmp + FP_DIGS, tmp + FP_DIGS, FP_DIGS - i, c0);
	}
	for (i = 0; i < FP_DIGS; i++, tmp++) {
		c[i] = *tmp;
	}
	for (i = 0; i < c1; i++) {
		fp_subn_low(c, c, m);
	}
	if (fp_cmp(c, m) != CMP_LT) {
		fp_subn_low(c, c, m);
	}
}
コード例 #10
0
void
mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
{
  mp_limb_t h;

  h = up[0] * vp[n - 1];

  if (n != 1)
    {
      mp_size_t i;
      mp_limb_t v0;

      v0 = *vp++;
      h += up[n - 1] * v0 + mpn_mul_1 (rp, up, n - 1, v0);
      rp++;

      for (i = n - 2; i > 0; i--)
	{
	  v0 = *vp++;
	  h += up[i] * v0 + mpn_addmul_1 (rp, up, i, v0);
	  rp++;
	}
    }

  rp[0] = h;
}
コード例 #11
0
ファイル: redc_2.c プロジェクト: RodneyBates/M3Devel
void
mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
{
  mp_limb_t q[2];
  mp_size_t j;
  mp_limb_t upn;
  mp_limb_t cy;

  ASSERT_MPN (up, 2*n);

  if ((n & 1) != 0)
    {
      up[0] = mpn_addmul_1 (up, mp, n, (up[0] * mip[0]) & GMP_NUMB_MASK);
      up++;
    }

  for (j = n - 2; j >= 0; j -= 2)
    {
      umul2low (q[1], q[0], mip[1], mip[0], up[1], up[0]);
      upn = up[n];		/* mpn_addmul_2 overwrites this */
      up[1] = mpn_addmul_2 (up, mp, n, q);
      up[0] = up[n];
      up[n] = upn;
      up += 2;
    }
  cy = mpn_add_n (rp, up, up - n, n);
  if (cy != 0)
    mpn_sub_n (rp, rp, mp, n);
}
コード例 #12
0
ファイル: rem_basecase.c プロジェクト: goens/flint2
void _nmod_poly_rem_basecase_1(mp_ptr R, mp_ptr W,
                               mp_srcptr A, long lenA, mp_srcptr B, long lenB,
                               nmod_t mod)
{
    if (lenB > 1)
    {
        const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n);
        long iR;
        mp_ptr R1 = W;

        mpn_copyi(R1, A, lenA);

        for (iR = lenA - 1; iR >= lenB - 1; iR--)
        {
            if (R1[iR] != 0)
            {
                const mp_limb_t q = n_mulmod2_preinv(R1[iR], invL, mod.n, mod.ninv);
                const mp_limb_t c = n_negmod(q, mod.n);

                mpn_addmul_1(R1 + iR - lenB + 1, B, lenB - 1, c);
            }
        }
        _nmod_vec_reduce(R, R1, lenB - 1, mod);
    }
}
コード例 #13
0
ファイル: mullow_classical.c プロジェクト: clear731/lattice
/* Assumes poly1 and poly2 are not length 0 and 0 < trunc <= len1 + len2 - 1 */
void
_nmod_poly_mullow_classical(mp_ptr res, mp_srcptr poly1, slong len1,
                            mp_srcptr poly2, slong len2, slong trunc, nmod_t mod)
{
    if (len1 == 1 || trunc == 1)    /* Special case if the length of output is 1 */
    {
        res[0] = n_mulmod2_preinv(poly1[0], poly2[0], mod.n, mod.ninv);
    }
    else                        /* Ordinary case */
    {
        slong i;

        slong bits = FLINT_BITS - (slong) mod.norm;
        slong log_len = FLINT_BIT_COUNT(len2);

        if (2 * bits + log_len <= FLINT_BITS)
        {
            /* Set res[i] = poly1[i]*poly2[0] */
            mpn_mul_1(res, poly1, FLINT_MIN(len1, trunc), poly2[0]);

            if (len2 != 1)
            {
                /* Set res[i+len1-1] = in1[len1-1]*in2[i] */
                if (trunc > len1)
                    mpn_mul_1(res + len1, poly2 + 1, trunc - len1,
                              poly1[len1 - 1]);

                /* out[i+j] += in1[i]*in2[j] */
                for (i = 0; i < FLINT_MIN(len1, trunc) - 1; i++)
                    mpn_addmul_1(res + i + 1, poly2 + 1,
                                 FLINT_MIN(len2, trunc - i) - 1, poly1[i]);
            }

            _nmod_vec_reduce(res, res, trunc, mod);
        }
        else
        {
            /* Set res[i] = poly1[i]*poly2[0] */
            _nmod_vec_scalar_mul_nmod(res, poly1, FLINT_MIN(len1, trunc),
                                 poly2[0], mod);

            if (len2 == 1)
                return;

            /* Set res[i+len1-1] = in1[len1-1]*in2[i] */
            if (trunc > len1)
                _nmod_vec_scalar_mul_nmod(res + len1, poly2 + 1, trunc - len1,
                                     poly1[len1 - 1], mod);

            /* out[i+j] += in1[i]*in2[j] */
            for (i = 0; i < FLINT_MIN(len1, trunc) - 1; i++)
                _nmod_vec_scalar_addmul_nmod(res + i + 1, poly2 + 1,
                                        FLINT_MIN(len2, trunc - i) - 1, 
                                        poly1[i], mod);
        }
    }
}
コード例 #14
0
ファイル: mullow_basecase.c プロジェクト: angavrilov/ecl-sse
void
mpn_mullow_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
{
  mp_size_t i;

  mpn_mul_1 (rp, up, n, vp[0]);

  for (i = 1; i < n; i++)
    mpn_addmul_1 (rp + i, up, n - i, vp[i]);
}
コード例 #15
0
ファイル: mulders.c プロジェクト: SESA/EbbRT-mpfr
/* Put in  rp[0..n] the n+1 low limbs of {up, n} * {vp, n}.
   Assume 2n limbs are allocated at rp. */
static void
mpfr_mullow_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up,
                        mpfr_limb_srcptr vp, mp_size_t n)
{
  mp_size_t i;

  rp[n] = mpn_mul_1 (rp, up, n, vp[0]);
  for (i = 1 ; i < n ; i++)
    mpn_addmul_1 (rp + i, up, n - i + 1, vp[i]);
}
コード例 #16
0
static mp_limb_t
DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
{
#if USE_MUL_1 && 0
  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
#else
  mp_limb_t __cy;
  __cy = mpn_lshift(ws,src,n,s);
  return    __cy + mpn_add_n(dst,dst,ws,n);
#endif
}
コード例 #17
0
void
mpn_sbpi1_bdiv_q (mp_ptr qp,
		  mp_ptr np, mp_size_t nn,
		  mp_srcptr dp, mp_size_t dn,
		  mp_limb_t dinv)
{
  mp_size_t i;
  mp_limb_t cy, q;

  ASSERT (dn > 0);
  ASSERT (nn >= dn);
  ASSERT ((dp[0] & 1) != 0);
  /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
     but some over N/Q overlaps will not work.  */

  for (i = nn - dn; i > 0; i--)
    {
      q = dinv * np[0];
      cy = mpn_addmul_1 (np, dp, dn, q);
      mpn_add_1 (np + dn, np + dn, i, cy);
      ASSERT (np[0] == 0);
      qp[0] = ~q;
      qp++;
      np++;
    }

  for (i = dn; i > 1; i--)
    {
      q = dinv * np[0];
      mpn_addmul_1 (np, dp, i, q);
      ASSERT (np[0] == 0);
      qp[0] = ~q;
      qp++;
      np++;
    }

  /* Final limb */
  q = dinv * np[0];
  qp[0] = ~q;
  mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1);
}
コード例 #18
0
ファイル: divrem_basecase.c プロジェクト: clear731/lattice
void
_nmod_poly_divrem_basecase_3(mp_ptr Q, mp_ptr R, mp_ptr W,
                             mp_srcptr A, slong lenA, mp_srcptr B, slong lenB,
                             nmod_t mod)
{
    const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n);
    slong iR, i;
    mp_ptr B3 = W, R3 = W + 3*(lenB - 1), ptrQ = Q - lenB + 1;

    for (i = 0; i < lenB - 1; i++)
    {
        B3[3 * i] = B[i];
        B3[3 * i + 1] = 0;
        B3[3 * i + 2] = 0;
    }
    for (i = 0; i < lenA; i++)
    {
        R3[3 * i] = A[i];
        R3[3 * i + 1] = 0;
        R3[3 * i + 2] = 0;
    }

    for (iR = lenA - 1; iR >= lenB - 1; )
    {
        mp_limb_t r =
            n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1],
                             R3[3 * iR], mod.n, mod.ninv);

        while ((iR + 1 >= lenB) && (r == WORD(0)))
        {
            ptrQ[iR--] = WORD(0);
            if (iR + 1 >= lenB)
                r = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1],
                                     R3[3 * iR], mod.n, mod.ninv);
        }

        if (iR + 1 >= lenB)
        {
            ptrQ[iR] = n_mulmod2_preinv(r, invL, mod.n, mod.ninv);

            if (lenB > 1)
            {
                const mp_limb_t c = n_negmod(ptrQ[iR], mod.n);
                mpn_addmul_1(R3 + 3 * (iR - lenB + 1), B3, 3 * lenB - 3, c);
            }
            iR--;
        }
    }

    for (iR = 0; iR < lenB - 1; iR++)
        R[iR] = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1],
                                 R3[3 * iR], mod.n, mod.ninv);
}
コード例 #19
0
ファイル: sbpi1_bdiv_q.c プロジェクト: 119/aircam-openwrt
void
mpn_sbpi1_bdiv_q (mp_ptr qp,
		  mp_ptr np, mp_size_t nn,
		  mp_srcptr dp, mp_size_t dn,
		  mp_limb_t dinv)
{
  mp_size_t i;
  mp_limb_t cy, q;

  ASSERT (dn > 0);
  ASSERT (nn >= dn);
  ASSERT ((dp[0] & 1) != 0);

  for (i = nn - dn; i > 0; i--)
    {
      q = dinv * np[0];
      qp[0] = ~q;
      qp++;
      cy = mpn_addmul_1 (np, dp, dn, q);
      mpn_add_1 (np + dn, np + dn, i, cy);
      ASSERT (np[0] == 0);
      np++;
    }

  for (i = dn; i > 1; i--)
    {
      q = dinv * np[0];
      qp[0] = ~q;
      qp++;
      mpn_addmul_1 (np, dp, i, q);
      ASSERT (np[0] == 0);
      np++;
    }

  /* Final limb */
  q = dinv * np[0];
  qp[0] = ~q;
  mpn_add_1 (qp - nn + 1, qp - nn + 1, nn, 1);
}
コード例 #20
0
ファイル: mulders.c プロジェクト: SESA/EbbRT-mpfr
/* Put in  rp[n..2n-1] an approximation of the n high limbs
   of {up, n} * {vp, n}. The error is less than n ulps of rp[n] (and the
   approximation is always less or equal to the truncated full product).
   Assume 2n limbs are allocated at rp.

   Implements Algorithm ShortMulNaive from [1].
*/
static void
mpfr_mulhigh_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up,
                         mpfr_limb_srcptr vp, mp_size_t n)
{
  mp_size_t i;

  rp += n - 1;
  umul_ppmm (rp[1], rp[0], up[n-1], vp[0]); /* we neglect up[0..n-2]*vp[0],
                                               which is less than B^n */
  for (i = 1 ; i < n ; i++)
    /* here, we neglect up[0..n-i-2] * vp[i], which is less than B^n too */
    rp[i + 1] = mpn_addmul_1 (rp, up + (n - i - 1), i + 1, vp[i]);
  /* in total, we neglect less than n*B^n, i.e., n ulps of rp[n]. */
}
コード例 #21
0
void
mpn_mullo_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
{
  mp_size_t i;

  mpn_mul_1 (rp, up, n, vp[0]);

  for (i = n - 1; i > 0; i--)
    {
      vp++;
      rp++;
      mpn_addmul_1 (rp, up, i, vp[0]);
    }
}
コード例 #22
0
ファイル: montfp.cpp プロジェクト: mahdiz/mpclib
// Montgomery multiplication.
// See Blake, Seroussi and Smart.
static inline void mont_mul(mp_limb_t *c, mp_limb_t *a, mp_limb_t *b,
	fptr p) {
	// Instead of right shifting every iteration
	// I allocate more room for the z array.
	size_t i, t = p->limbs;

#ifdef _MSC_VER		// for VC++ compatibility
	mp_limb_t z[2 * MAX_LIMBS + 1];
#else
	mp_limb_t z[2 * t + 1];
#endif

	mp_limb_t u = (a[0] * b[0]) * p->negpinv;
	mp_limb_t v = z[t] = mpn_mul_1(z, b, t, a[0]);
	z[t] += mpn_addmul_1(z, p->primelimbs, t, u);
	z[t + 1] = z[t] < v;  // Handle overflow.
	for (i = 1; i < t; i++) {
		u = (z[i] + a[i] * b[0]) * p->negpinv;
		v = z[t + i] += mpn_addmul_1(z + i, b, t, a[i]);
		z[t + i] += mpn_addmul_1(z + i, p->primelimbs, t, u);
		z[t + i + 1] = z[t + i] < v;
	}
	if (z[t * 2] || mpn_cmp(z + t, p->primelimbs, t) >= 0) {
		mpn_sub_n(c, z + t, p->primelimbs, t);
	}
	else {
		memcpy(c, z + t, t * sizeof(mp_limb_t));
		// Doesn't seem to make a difference:
		/*
		   mpz_t z1, z2;
		   z1->_mp_d = c;
		   z2->_mp_d = z + t;
		   z1->_mp_size = z1->_mp_alloc = z2->_mp_size = z2->_mp_alloc = t;
		   mpz_set(z1, z2);
		   */
	}
}
コード例 #23
0
ファイル: powm_sec.c プロジェクト: AhmadTux/DragonFlyBSD
/* Define our own squaring function, which uses mpn_sqr_basecase for its
   allowed sizes, but its own code for larger sizes.  */
static void
mpn_local_sqr (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr tp)
{
  mp_size_t i;

  ASSERT (n >= 1);
  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));

  if (BELOW_THRESHOLD (n, SQR_BASECASE_LIM))
    {
      mpn_sqr_basecase (rp, up, n);
      return;
    }

  {
    mp_limb_t ul, lpl;
    ul = up[0];
    umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    rp[0] = lpl >> GMP_NAIL_BITS;
  }
  if (n > 1)
    {
      mp_limb_t cy;

      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
      tp[n - 1] = cy;
      for (i = 2; i < n; i++)
	{
	  mp_limb_t cy;
	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
	  tp[n + i - 2] = cy;
	}
      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);

      {
	mp_limb_t cy;
#if HAVE_NATIVE_mpn_addlsh1_n
	cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
#else
	cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
	cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
#endif
	rp[2 * n - 1] += cy;
      }
    }
}
コード例 #24
0
ファイル: montfp.cpp プロジェクト: mahdiz/mpclib
// Montgomery reduction.
// Algorithm II.4 from Blake, Seroussi and Smart.
static void mont_reduce(mp_limb_t *x, mp_limb_t *y, fptr p) {
	size_t t = p->limbs;
	size_t i;
	mp_limb_t flag = 0;
	for (i = 0; i < t; i++) {
		mp_limb_t u = y[i] * p->negpinv;
		mp_limb_t carry = mpn_addmul_1(&y[i], p->primelimbs, t, u);
		//mpn_add_1(&y[i+t], &y[i+t], t - i + 1, carry);
		flag += mpn_add_1(&y[i + t], &y[i + t], t - i, carry);
	}
	if (flag || mpn_cmp(&y[t], p->primelimbs, t) >= 0) {
		mpn_sub_n(x, &y[t], p->primelimbs, t);
	}
	else {
		// TODO: GMP set might be faster.
		memcpy(x, &y[t], t * sizeof(mp_limb_t));
	}
}
コード例 #25
0
ファイル: redc_1.c プロジェクト: Masuzu/RumourPropagation
/* Set cp[] <- tp[]/R^n mod mp[].  Clobber tp[].
   mp[] is n limbs; tp[] is 2n limbs.  */
void mpn_redc_1 (mp_ptr cp, mp_ptr tp, mp_srcptr mp, mp_size_t n, mp_limb_t Nprim)
{
  mp_limb_t cy;
  mp_limb_t q;
  mp_size_t j;

  ASSERT_MPN (tp, 2*n);

  for (j = 0; j < n; j++)
    {
      q = (tp[0] * Nprim) & GMP_NUMB_MASK;
      tp[0] = mpn_addmul_1 (tp, mp, n, q);
      tp++;
    }
  cy = mpn_add_n (cp, tp, tp - n, n);
  if (cy != 0)
    mpn_sub_n (cp, cp, mp, n);
}
コード例 #26
0
ファイル: redc_1_sec.c プロジェクト: 119/aircam-openwrt
void
mpn_redc_1_sec (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
{
  mp_size_t j;
  mp_limb_t cy;

  ASSERT (n > 0);
  ASSERT_MPN (up, 2*n);

  for (j = n - 1; j >= 0; j--)
    {
      cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
      ASSERT (up[0] == 0);
      up[0] = cy;
      up++;
    }
  cy = mpn_add_n (rp, up, up - n, n);
  mpn_subcnd_n (rp, rp, mp, n, cy);
}
コード例 #27
0
ファイル: mullow_n.c プロジェクト: angavrilov/ecl-sse
void
mpn_mullow_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
  if (BELOW_THRESHOLD (n, MULLOW_BASECASE_THRESHOLD))
    {
      /* Allocate workspace of fixed size on stack: fast! */
      mp_limb_t ws[MUL_BASECASE_ALLOC];
      mpn_mul_basecase (ws, xp, n, yp, n);
      MPN_COPY (rp, ws, n);
    }
  else if (BELOW_THRESHOLD (n, MULLOW_DC_THRESHOLD))
    {
      mpn_mullow_basecase (rp, xp, yp, n);
    }
  else if (BELOW_THRESHOLD (n, MULLOW_MUL_N_THRESHOLD))
    {
      /* Divide-and-conquer */
      mp_size_t n2 = n >> 1;		/* floor(n/2) */
      mp_size_t n1 = n - n2;		/* ceil(n/2) */
      mp_ptr tp;
      TMP_SDECL;
      TMP_SMARK;
      tp = TMP_SALLOC_LIMBS (n1);

      /* Split as x = x1 2^(n1 GMP_NUMB_BITS) + x0,
                  y = y1 2^(n2 GMP_NUMB_BITS) + y0 */

      /* x0 * y0 */
      mpn_mul_n (rp, xp, yp, n2);
      if (n1 != n2)
	rp[2 * n2] = mpn_addmul_1 (rp + n2, yp, n2, xp[n2]);

      /* x1 * y0 * 2^(n1 GMP_NUMB_BITS) */
      mpn_mullow_n (tp, xp + n1, yp, n2);
      mpn_add_n (rp + n1, rp + n1, tp, n2);

      /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
      mpn_mullow_n (tp, yp + n2, xp, n1);
      mpn_add_n (rp + n2, rp + n2, tp, n1);
      TMP_SFREE;
    }
  else
    {
コード例 #28
0
ファイル: mul_n.c プロジェクト: eva-oss/glibc-linaro
void
impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
{
  mp_size_t i;
  mp_limb_t cy_limb;
  mp_limb_t v_limb;

  /* Multiply by the first limb in V separately, as the result can be
     stored (not added) to PROD.  We also avoid a loop for zeroing.  */
  v_limb = vp[0];
  if (v_limb <= 1)
    {
      if (v_limb == 1)
	MPN_COPY (prodp, up, size);
      else
	MPN_ZERO (prodp, size);
      cy_limb = 0;
    }
  else
    cy_limb = mpn_mul_1 (prodp, up, size, v_limb);

  prodp[size] = cy_limb;
  prodp++;

  /* For each iteration in the outer loop, multiply one limb from
     U with one limb from V, and add it to PROD.  */
  for (i = 1; i < size; i++)
    {
      v_limb = vp[i];
      if (v_limb <= 1)
	{
	  cy_limb = 0;
	  if (v_limb == 1)
	    cy_limb = mpn_add_n (prodp, prodp, up, size);
	}
      else
	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);

      prodp[size] = cy_limb;
      prodp++;
    }
}
コード例 #29
0
ファイル: div_basecase.c プロジェクト: goens/flint2
void
_nmod_poly_div_basecase_1(mp_ptr Q, mp_ptr W,
                             mp_srcptr A, long A_len, mp_srcptr B, long B_len,
                             nmod_t mod)
{
    mp_limb_t lead_inv = n_invmod(B[B_len - 1], mod.n);
    long len, coeff = A_len - B_len;
    
    mp_ptr R1 = W;
    mp_srcptr Btop = B + B_len - 1;
    
    mpn_copyi(R1, A + B_len - 1, A_len - B_len + 1);

    while (coeff >= 0)
    {
        R1[coeff] = n_mod2_preinv(R1[coeff], mod.n, mod.ninv);

        while (coeff >= 0 && R1[coeff] == 0L)
        {
            Q[coeff--] = 0L;
            if (coeff >= 0)
                R1[coeff] = n_mod2_preinv(R1[coeff], mod.n, mod.ninv);
        }

        if (coeff >= 0)
        {
            mp_limb_t c, * R_sub;

            Q[coeff] =
                n_mulmod2_preinv(R1[coeff], lead_inv, mod.n, mod.ninv);

            c = n_negmod(Q[coeff], mod.n);

            len = FLINT_MIN(B_len - 1, coeff);
            R_sub = R1 + coeff - len;
            if (len > 0)
                mpn_addmul_1(R_sub, Btop - len, len, c);

            coeff--;
        }
    }
}
コード例 #30
0
ファイル: rem_basecase.c プロジェクト: goens/flint2
void _nmod_poly_rem_basecase_3(mp_ptr R, mp_ptr W,
                               mp_srcptr A, long lenA, mp_srcptr B, long lenB,
                               nmod_t mod)
{
    if (lenB > 1)
    {
        const mp_limb_t invL = n_invmod(B[lenB - 1], mod.n);
        long iR, i;
        mp_ptr B3 = W, R3 = W + 3*(lenB - 1);

        for (i = 0; i < lenB - 1; i++)
        {
            B3[3 * i] = B[i];
            B3[3 * i + 1] = 0;
            B3[3 * i + 2] = 0;
        }
        for (i = 0; i < lenA; i++)
        {
            R3[3 * i] = A[i];
            R3[3 * i + 1] = 0;
            R3[3 * i + 2] = 0;
        }

        for (iR = lenA - 1; iR >= lenB - 1; iR--)
        {
            const mp_limb_t r = n_lll_mod_preinv(R3[3*iR + 2], R3[3*iR + 1],
                                                 R3[3*iR], mod.n, mod.ninv);

            if (r != 0)
            {
                const mp_limb_t q = n_mulmod2_preinv(r, invL, mod.n, mod.ninv);
                const mp_limb_t c = n_negmod(q, mod.n);
                mpn_addmul_1(R3 + 3 * (iR - lenB + 1), B3, 3 * lenB - 3, c);
            }
        }

        for (iR = 0; iR < lenB - 1; iR++)
            R[iR] = n_lll_mod_preinv(R3[3 * iR + 2], R3[3 * iR + 1],
                                     R3[3 * iR], mod.n, mod.ninv);
    }
}