コード例 #1
0
ファイル: mulders.c プロジェクト: Distrotech/mpfr
/* Put in  rp[n..2n-1] an approximation of the n high limbs
   of {np, n}^2. The error is less than n ulps of rp[n]. */
void
mpfr_sqrhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mp_size_t n)
{
  mp_size_t k;

  MPFR_STAT_STATIC_ASSERT (MPFR_SQRHIGH_TAB_SIZE > 2); /* ensures k < n */
  k = MPFR_LIKELY (n < MPFR_SQRHIGH_TAB_SIZE) ? sqrhigh_ktab[n]
    : (n+4)/2; /* ensures that k >= (n+3)/2 */
  MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n));
  if (k < 0)
    /* we can't use mpn_sqr_basecase here, since it requires
       n <= SQR_KARATSUBA_THRESHOLD, where SQR_KARATSUBA_THRESHOLD
       is not exported by GMP */
    mpn_sqr_n (rp, np, n);
  else if (k == 0)
    mpfr_mulhigh_n_basecase (rp, np, np, n);
  else
    {
      mp_size_t l = n - k;
      mp_limb_t cy;

      mpn_sqr_n (rp + 2 * l, np + l, k);          /* fills rp[2l..2n-1] */
      mpfr_mulhigh_n (rp, np, np + k, l);         /* fills rp[l-1..2l-1] */
      /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */
      cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1);
      cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
      mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
    }
}
コード例 #2
0
int
main (int argc, char **argv)
{
  mp_ptr r1p, r2p, s1p, s2p;
  double t;
  mp_size_t n;

  n = strtol (argv[1], 0, 0);

  r1p = malloc (n * BYTES_PER_MP_LIMB);
  r2p = malloc (n * BYTES_PER_MP_LIMB);
  s1p = malloc (n * BYTES_PER_MP_LIMB);
  s2p = malloc (n * BYTES_PER_MP_LIMB);
  TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
  printf ("              separate add and sub: %.3f\n", t);
  TIME (t,mpn_addsub_n(r1p,r2p,s1p,s2p,n));
  printf ("combined addsub separate variables: %.3f\n", t);
  TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
  printf ("        combined addsub r1 overlap: %.3f\n", t);
  TIME (t,mpn_addsub_n(r1p,r2p,r1p,s2p,n));
  printf ("        combined addsub r2 overlap: %.3f\n", t);
  TIME (t,mpn_addsub_n(r1p,r2p,r1p,r2p,n));
  printf ("          combined addsub in-place: %.3f\n", t);

  return 0;
}
コード例 #3
0
ファイル: division.c プロジェクト: wbhart/division
mp_limb_t div_preinv1(mp_limb_t d1, mp_limb_t d2)
{
   mp_limb_t q, r[2], p[2], cy;
   
   if (d2 + 1 == 0 && d1 + 1 == 0)
      return 0;

   if (d1 + 1 == 0)
      q = ~d1, r[1] = ~d2;
   else
      udiv_qrnnd(q, r[1], ~d1, ~d2, d1 + 1);

   r[0] = 0;

   if (d2 + 1 == 0)
      add_ssaaaa(cy, r[1], 0, r[1], 0, q);   
   else
   {
      umul_ppmm(p[1], p[0], q, ~d2 - 1);
      cy = mpn_add_n(r, r, p, 2);
   }
 
   p[0] = d2 + 1, p[1] = d1 + (d2 + 1 == 0);
   if (cy || mpn_cmp(r, p, 2) >= 0)
      q++;
   
   return q;
}
コード例 #4
0
void mpir_fft_trunc1_twiddle(mp_ptr * ii, mp_size_t is,
      mp_size_t n, mp_bitcnt_t w, mp_ptr * t1, mp_ptr * t2,
      mp_size_t ws, mp_size_t r, mp_size_t c, mp_size_t rs, mp_size_t trunc)
{
   mp_size_t i;
   mp_size_t limbs = (w*n)/GMP_LIMB_BITS;
   
   if (trunc == 2*n)
      mpir_fft_radix2_twiddle(ii, is, n, w, t1, t2, ws, r, c, rs);
   else if (trunc <= n)
   {
      for (i = 0; i < n; i++)
         mpn_add_n(ii[i*is], ii[i*is], ii[(i+n)*is], limbs + 1);
      
      mpir_fft_trunc1_twiddle(ii, is, n/2, 2*w, t1, t2, ws, r, c, 2*rs, trunc);
   } else
   {
      for (i = 0; i < n; i++) 
      {   
         mpir_fft_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, limbs, w);
   
         MP_PTR_SWAP(ii[i*is],     *t1);
         MP_PTR_SWAP(ii[(n+i)*is], *t2);
      }

      mpir_fft_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, ws, r, c, 2*rs);  
      mpir_fft_trunc1_twiddle(ii + n*is, is, n/2, 2*w, 
                                     t1, t2, ws, r + rs, c, 2*rs, trunc - n);
   }
}
コード例 #5
0
ファイル: redc_2.c プロジェクト: RodneyBates/M3Devel
void
mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
{
  mp_limb_t q[2];
  mp_size_t j;
  mp_limb_t upn;
  mp_limb_t cy;

  ASSERT_MPN (up, 2*n);

  if ((n & 1) != 0)
    {
      up[0] = mpn_addmul_1 (up, mp, n, (up[0] * mip[0]) & GMP_NUMB_MASK);
      up++;
    }

  for (j = n - 2; j >= 0; j -= 2)
    {
      umul2low (q[1], q[0], mip[1], mip[0], up[1], up[0]);
      upn = up[n];		/* mpn_addmul_2 overwrites this */
      up[1] = mpn_addmul_2 (up, mp, n, q);
      up[0] = up[n];
      up[n] = upn;
      up += 2;
    }
  cy = mpn_add_n (rp, up, up - n, n);
  if (cy != 0)
    mpn_sub_n (rp, rp, mp, n);
}
コード例 #6
0
ファイル: atest-exp2.c プロジェクト: jnaulet/glibc
/* Compute e^x.  */
static void
exp_mpn (mp1 ex, mp1 x)
{
   unsigned int n;
   mp1 xp;
   mp2 tmp;
   mp_limb_t chk;
   mp1 tol;

   memset (xp, 0, sizeof (mp1));
   memset (ex, 0, sizeof (mp1));
   xp[FRAC / mpbpl] = (mp_limb_t)1 << FRAC % mpbpl;
   memset (tol, 0, sizeof (mp1));
   tol[(FRAC - TOL) / mpbpl] = (mp_limb_t)1 << (FRAC - TOL) % mpbpl;

   n = 0;

   do
     {
       /* Calculate sum(x^n/n!) until the next term is sufficiently small.  */

       mpn_mul_n (tmp, xp, x, SZ);
       assert(tmp[SZ * 2 - 1] == 0);
       if (n > 0)
	 mpn_divmod_1 (xp, tmp + FRAC / mpbpl, SZ, n);
       chk = mpn_add_n (ex, ex, xp, SZ);
       assert (chk == 0);
       ++n;
       assert (n < 80); /* Catch too-high TOL.  */
     }
   while (n < 10 || mpn_cmp (xp, tol, SZ) >= 0);
}
コード例 #7
0
ファイル: invert.c プロジェクト: Macaulay2/mpir
void mpn_invert_trunc(mp_ptr x_new, mp_size_t m, mp_srcptr xp, mp_size_t n, mp_srcptr ap)
{
  mp_ptr tp;
  mp_limb_t cy;
  TMP_DECL;

  TMP_MARK;
  tp = TMP_ALLOC_LIMBS (2 * m);
  
  MPN_COPY(x_new, xp + n - m, m);
  ap += (n - m);

  mpn_mul_n (tp, x_new, ap, m);
  mpn_add_n (tp + m, tp + m, ap, m); /* A * msb(X) */
  
  /* now check B^(2n) - X*A <= A */
  mpn_not (tp, 2 * m);
  mpn_add_1 (tp, tp, 2 * m, 1); /* B^(2m) - X*A */
  
  while (tp[m] || mpn_cmp (tp, ap, m) > 0)
  {
     mpn_add_1(x_new, x_new, m, 1);
     tp[m] -= mpn_sub_n(tp, tp, ap, m);
  }
  TMP_FREE;
}
コード例 #8
0
ファイル: t-invert.c プロジェクト: BrianGladman/mpir
int
test_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n)
{
  int res = 1;
  mp_size_t i;
  mp_ptr tp, up;
  mp_limb_t cy;
  TMP_DECL;

  TMP_MARK;
  tp = TMP_ALLOC_LIMBS (2 * n);
  up = TMP_ALLOC_LIMBS (2 * n);

  /* first check X*A < B^(2*n) */
  mpn_mul_n (tp, xp, ap, n);
  cy = mpn_add_n (tp + n, tp + n, ap, n); /* A * msb(X) */
  if (cy != 0)
    return 0;

  /* now check B^(2n) - X*A <= A */
  mpn_com_n (tp, tp, 2 * n);
  mpn_add_1 (tp, tp, 2 * n, 1); /* B^(2n) - X*A */
  MPN_ZERO (up, 2 * n);
  MPN_COPY (up, ap, n);
  res = mpn_cmp (tp, up, 2 * n) <= 0;
  TMP_FREE;
  return res;
}
コード例 #9
0
ファイル: dc_divrem_n.c プロジェクト: STAR111/GCC_parser
static mp_limb_t
mpn_dc_div_2_by_1 (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
{
  mp_limb_t qhl, cc;
  mp_size_t n2 = n/2;

  if (n % 2 != 0)
    {
      mp_ptr qp1 = qp + 1;
      qhl = mpn_dc_div_3_by_2 (qp1 + n2, np + 2 + n2, dp + 1, n2, scratch);
      qhl += mpn_add_1 (qp1 + n2, qp1 + n2, n2,
			mpn_dc_div_3_by_2 (qp1, np + 2, dp + 1, n2, scratch));

      cc = mpn_submul_1 (np + 1, qp1, n - 1, dp[0]);
      cc = mpn_sub_1 (np + n, np + n, 1, cc);
      if (qhl != 0)
	cc += mpn_sub_1 (np + n, np + n, 1, dp[0]);
      while (cc != 0)
	{
	  qhl -= mpn_sub_1 (qp1, qp1, n - 1, (mp_limb_t) 1);
	  cc -= mpn_add_n (np + 1, np + 1, dp, n);
	}
      qhl += mpn_add_1 (qp1, qp1, n - 1,
			mpn_sb_divrem_mn (qp, np, n + 1, dp, n));
    }
  else
    {
      qhl = mpn_dc_div_3_by_2 (qp + n2, np + n2, dp, n2, scratch);
      qhl += mpn_add_1 (qp + n2, qp + n2, n2,
			mpn_dc_div_3_by_2 (qp, np, dp, n2, scratch));
    }
  return qhl;
}
コード例 #10
0
ファイル: toom63_mul.c プロジェクト: bngabonziza/miktex
static int
abs_sub_add_n (mp_ptr rm, mp_ptr rp, mp_srcptr rs, mp_size_t n) {
  int result;
  result = abs_sub_n (rm, rp, rs, n);
  ASSERT_NOCARRY(mpn_add_n (rp, rp, rs, n));
  return result;
}
コード例 #11
0
ファイル: fastfp.c プロジェクト: SumanKNath/PBC.Net
static void fp_sub(element_ptr r, element_ptr a, element_ptr b) {
  fp_field_data_ptr p = r->field->data;
  size_t t = p->limbs;
  if (mpn_sub_n(r->data, a->data, b->data, t)) {
    mpn_add_n(r->data, r->data, p->primelimbs, t);
  }
}
コード例 #12
0
ファイル: dcpi1_div_qr.c プロジェクト: 119/aircam-openwrt
mp_limb_t
mpn_dcpi1_div_qr_n (mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t n,
		    gmp_pi1_t *dinv, mp_ptr tp)
{
  mp_size_t lo, hi;
  mp_limb_t cy, qh, ql;

  lo = n >> 1;			/* floor(n/2) */
  hi = n - lo;			/* ceil(n/2) */

  if (BELOW_THRESHOLD (hi, DC_DIV_QR_THRESHOLD))
    qh = mpn_sbpi1_div_qr (qp + lo, np + 2 * lo, 2 * hi, dp + lo, hi, dinv->inv32);
  else
    qh = mpn_dcpi1_div_qr_n (qp + lo, np + 2 * lo, dp + lo, hi, dinv, tp);

  mpn_mul (tp, qp + lo, hi, dp, lo);

  cy = mpn_sub_n (np + lo, np + lo, tp, n);
  if (qh != 0)
    cy += mpn_sub_n (np + n, np + n, dp, lo);

  while (cy != 0)
    {
      qh -= mpn_sub_1 (qp + lo, qp + lo, hi, 1);
      cy -= mpn_add_n (np + lo, np + lo, dp, n);
    }

  if (BELOW_THRESHOLD (lo, DC_DIV_QR_THRESHOLD))
    ql = mpn_sbpi1_div_qr (qp, np + hi, 2 * lo, dp + hi, lo, dinv->inv32);
  else
    ql = mpn_dcpi1_div_qr_n (qp, np + hi, dp + hi, lo, dinv, tp);

  mpn_mul (tp, dp, hi, qp, lo);

  cy = mpn_sub_n (np, np, tp, n);
  if (ql != 0)
    cy += mpn_sub_n (np + lo, np + lo, dp, hi);

  while (cy != 0)
    {
      mpn_sub_1 (qp, qp, lo, 1);
      cy -= mpn_add_n (np, np, dp, n);
    }

  return qh;
}
コード例 #13
0
ファイル: mul_n.c プロジェクト: Masuzu/RumourPropagation
static void	mpn_karasub(mp_ptr rp,mp_ptr tp,mp_size_t n)
{mp_size_t n2,n3;mp_limb_t c1,c2,c3,top[2];

n2=n>>1;n3=n-n2;
c1=mpn_sub_n(tp,rp+2*n2,tp,2*n2);//c1=mpn_sub_n(tp,rp+2*n2,tp,2*n3);
c2=mpn_add_n(tp,tp,rp,2*n2);
c3=mpn_add_n(rp+n2,rp+n2,tp,2*n2);//c3=mpn_add_n(rp+n2,rp+n2,tp,2*n3);
top[1]=rp[2*n2+2*n3-1];top[0]=rp[2*n2+2*n3-2];
mpn_incr_u(rp+3*n2,c3);//mpn_incr_u(rp+n2+2*n3,c3);
mpn_incr_u(rp+3*n2,c2);
mpn_decr_u(rp+3*n2,c1);//mpn_decr_u(rp+n2+2*n3,c1);
if(n2==n3)return;
c1=mpn_sub_n(rp+3*n2,rp+3*n2,tp+2*n2,2);
c2=mpn_add_n(rp+3*n2,rp+3*n2,top,2);
if(c2==1 && c1==0)mpn_incr_u(rp+3*n2+2,1);
if(c2==0 && c1==1)mpn_decr_u(rp+3*n2+2,1);
return;}
コード例 #14
0
ファイル: lucnum2_ui.c プロジェクト: qsnake/mpir
void
mpz_lucnum2_ui (mpz_ptr ln, mpz_ptr lnsub1, unsigned long n)
{
  mp_ptr     lp, l1p, f1p;
  mp_size_t  size;
  mp_limb_t  c;
  TMP_DECL;

  ASSERT (ln != lnsub1);

  /* handle small n quickly, and hide the special case for L[-1]=-1 */
  if (n <= FIB_TABLE_LUCNUM_LIMIT)
    {
      mp_limb_t  f  = FIB_TABLE (n);
      mp_limb_t  f1 = FIB_TABLE ((int) n - 1);

      /* L[n] = F[n] + 2F[n-1] */
      PTR(ln)[0] = f + 2*f1;
      SIZ(ln) = 1;

      /* L[n-1] = 2F[n] - F[n-1], but allow for L[-1]=-1 */
      PTR(lnsub1)[0] = (n == 0 ? 1 : 2*f - f1);
      SIZ(lnsub1) = (n == 0 ? -1 : 1);

      return;
    }

  TMP_MARK;
  size = MPN_FIB2_SIZE (n);
  f1p = TMP_ALLOC_LIMBS (size);

  MPZ_REALLOC (ln,     size+1);
  MPZ_REALLOC (lnsub1, size+1);
  lp  = PTR(ln);
  l1p = PTR(lnsub1);

  size = mpn_fib2_ui (l1p, f1p, n);

  /* L[n] = F[n] + 2F[n-1] */
#if HAVE_NATIVE_mpn_addlsh1_n
  c = mpn_addlsh1_n (lp, l1p, f1p, size);
#else
  c = mpn_lshift1 (lp, f1p, size);
  c += mpn_add_n (lp, lp, l1p, size);
#endif
  lp[size] = c;
  SIZ(ln) = size + (c != 0);

  /* L[n-1] = 2F[n] - F[n-1] */
  c = mpn_double (l1p, size);
  c -= mpn_sub_n (l1p, l1p, f1p, size);
  ASSERT ((mp_limb_signed_t) c >= 0);
  l1p[size] = c;
  SIZ(lnsub1) = size + (c != 0);

  TMP_FREE;
}
コード例 #15
0
ファイル: fastfp.c プロジェクト: SumanKNath/PBC.Net
static int fp_sgn_even(element_ptr a) {
  fp_field_data_ptr p = a->field->data;
  if (fp_is0(a)) return 0;
  mp_limb_t *sum = _alloca(p->limbs * sizeof(mp_limb_t));

  int carry = mpn_add_n(sum, a->data, a->data, p->limbs);
  if (carry) return 1;
  return mpn_cmp(sum, p->primelimbs, p->limbs);
}
コード例 #16
0
ファイル: t-matrix22.c プロジェクト: RodneyBates/M3Devel
static void
ref_matrix22_mul (struct matrix *R,
		  const struct matrix *A,
		  const struct matrix *B, mp_ptr tp)
{
  mp_size_t an, bn, n;
  mp_ptr r00, r01, r10, r11, a00, a01, a10, a11, b00, b01, b10, b11;

  if (A->n >= B->n)
    {
      r00 = R->e00; a00 = A->e00; b00 = B->e00;
      r01 = R->e01; a01 = A->e01; b01 = B->e01;
      r10 = R->e10; a10 = A->e10; b10 = B->e10;
      r11 = R->e11; a11 = A->e11; b11 = B->e11;
      an = A->n, bn = B->n;
    }
  else
    {
      /* Transpose */
      r00 = R->e00; a00 = B->e00; b00 = A->e00;
      r01 = R->e10; a01 = B->e10; b01 = A->e10;
      r10 = R->e01; a10 = B->e01; b10 = A->e01;
      r11 = R->e11; a11 = B->e11; b11 = A->e11;
      an = B->n, bn = A->n;
    }
  n = an + bn;
  R->n = n + 1;

  mpn_mul (r00, a00, an, b00, bn);
  mpn_mul (tp, a01, an, b10, bn);
  r00[n] = mpn_add_n (r00, r00, tp, n);

  mpn_mul (r01, a00, an, b01, bn);
  mpn_mul (tp, a01, an, b11, bn);
  r01[n] = mpn_add_n (r01, r01, tp, n);

  mpn_mul (r10, a10, an, b00, bn);
  mpn_mul (tp, a11, an, b10, bn);
  r10[n] = mpn_add_n (r10, r10, tp, n);

  mpn_mul (r11, a10, an, b01, bn);
  mpn_mul (tp, a11, an, b11, bn);
  r11[n] = mpn_add_n (r11, r11, tp, n);
}
コード例 #17
0
ファイル: fastfp.c プロジェクト: SumanKNath/PBC.Net
static void fp_add(element_ptr r, element_ptr a, element_ptr b) {
  fp_field_data_ptr p = r->field->data;
  const size_t t = p->limbs;
  mp_limb_t carry;
  carry = mpn_add_n(r->data, a->data, b->data, t);

  if (carry || mpn_cmp(r->data, p->primelimbs, t) >= 0) {
    mpn_sub_n(r->data, r->data, p->primelimbs, t);
  }
}
コード例 #18
0
ファイル: ecc-modp.c プロジェクト: Distrotech/nettle
void
ecc_modp_add (const struct ecc_curve *ecc, mp_limb_t *rp,
	      const mp_limb_t *ap, const mp_limb_t *bp)
{
  mp_limb_t cy;
  cy = mpn_add_n (rp, ap, bp, ecc->size);
  cy = cnd_add_n (cy, rp, ecc->Bmodp, ecc->size);
  cy = cnd_add_n (cy, rp, ecc->Bmodp, ecc->size);
  assert (cy == 0);  
}
コード例 #19
0
ファイル: mullow_n.c プロジェクト: angavrilov/ecl-sse
void
mpn_mullow_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
  if (BELOW_THRESHOLD (n, MULLOW_BASECASE_THRESHOLD))
    {
      /* Allocate workspace of fixed size on stack: fast! */
      mp_limb_t ws[MUL_BASECASE_ALLOC];
      mpn_mul_basecase (ws, xp, n, yp, n);
      MPN_COPY (rp, ws, n);
    }
  else if (BELOW_THRESHOLD (n, MULLOW_DC_THRESHOLD))
    {
      mpn_mullow_basecase (rp, xp, yp, n);
    }
  else if (BELOW_THRESHOLD (n, MULLOW_MUL_N_THRESHOLD))
    {
      /* Divide-and-conquer */
      mp_size_t n2 = n >> 1;		/* floor(n/2) */
      mp_size_t n1 = n - n2;		/* ceil(n/2) */
      mp_ptr tp;
      TMP_SDECL;
      TMP_SMARK;
      tp = TMP_SALLOC_LIMBS (n1);

      /* Split as x = x1 2^(n1 GMP_NUMB_BITS) + x0,
                  y = y1 2^(n2 GMP_NUMB_BITS) + y0 */

      /* x0 * y0 */
      mpn_mul_n (rp, xp, yp, n2);
      if (n1 != n2)
	rp[2 * n2] = mpn_addmul_1 (rp + n2, yp, n2, xp[n2]);

      /* x1 * y0 * 2^(n1 GMP_NUMB_BITS) */
      mpn_mullow_n (tp, xp + n1, yp, n2);
      mpn_add_n (rp + n1, rp + n1, tp, n2);

      /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
      mpn_mullow_n (tp, yp + n2, xp, n1);
      mpn_add_n (rp + n2, rp + n2, tp, n1);
      TMP_SFREE;
    }
  else
    {
コード例 #20
0
ファイル: matrix22_mul.c プロジェクト: BrianGladman/mpir
void
mpn_matrix22_mul (mp_ptr r0, mp_ptr r1, mp_ptr r2, mp_ptr r3, mp_size_t rn,
		  mp_srcptr m0, mp_srcptr m1, mp_srcptr m2, mp_srcptr m3, mp_size_t mn,
		  mp_ptr tp)
{
  if (BELOW_THRESHOLD (rn, MATRIX22_STRASSEN_THRESHOLD)
      || BELOW_THRESHOLD (mn, MATRIX22_STRASSEN_THRESHOLD))
    {
      mp_ptr p0, p1;
      unsigned i;

      /* Temporary storage: 3 rn + 2 mn */
      p0 = tp + rn;
      p1 = p0 + rn + mn;

      for (i = 0; i < 2; i++)
	{
	  MPN_COPY (tp, r0, rn);

	  if (rn >= mn)
	    {
	      mpn_mul (p0, r0, rn, m0, mn);
	      mpn_mul (p1, r1, rn, m3, mn);
	      mpn_mul (r0, r1, rn, m2, mn);
	      mpn_mul (r1, tp, rn, m1, mn);
	    }
	  else
	    {
	      mpn_mul (p0, m0, mn, r0, rn);
	      mpn_mul (p1, m3, mn, r1, rn);
	      mpn_mul (r0, m2, mn, r1, rn);
	      mpn_mul (r1, m1, mn, tp, rn);
	    }
	  r0[rn+mn] = mpn_add_n (r0, r0, p0, rn + mn);
	  r1[rn+mn] = mpn_add_n (r1, r1, p1, rn + mn);

	  r0 = r2; r1 = r3;
	}
    }
  else
    mpn_matrix22_mul_strassen (r0, r1, r2, r3, rn,
			       m0, m1, m2, m3, mn, tp);
}
コード例 #21
0
static mp_limb_t
DO_mpn_addlsh_n(mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
{
#if USE_MUL_1 && 0
  return mpn_addmul_1(dst,src,n,CNST_LIMB(1) <<(s));
#else
  mp_limb_t __cy;
  __cy = mpn_lshift(ws,src,n,s);
  return    __cy + mpn_add_n(dst,dst,ws,n);
#endif
}
コード例 #22
0
ファイル: t-mul.c プロジェクト: HRF92/mpir
static void
ref_mpn_mul (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
{
  mp_ptr tp;
  mp_size_t tn;
  mp_limb_t cy;

  if (vn < TOOM3_THRESHOLD)
    {
      /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own
	 mul_basecase.  */
      if (vn != 0)
	mul_basecase (wp, up, un, vp, vn);
      else
	MPN_ZERO (wp, un);
      return;
    }

  if (vn < FFT_THRESHOLD)
    {
      /* In the mpn_toom3_mul_n and mpn_toom4_mul_n range, use mpn_kara_mul_n.  */
      tn = 2 * vn + MPN_KARA_MUL_N_TSIZE (vn);
      tp = __GMP_ALLOCATE_FUNC_LIMBS (tn);
      mpn_kara_mul_n (tp, up, vp, vn, tp + 2 * vn);
    }
  else
    {
      /* Finally, for the largest operands, use mpn_toom3_mul_n.  */
      /* The "- 63 + 255" tweaks the allocation to allow for huge operands.
	 See the definition of this macro in gmp-impl.h to understand this.  */
      tn = 2 * vn + MPN_TOOM3_MUL_N_TSIZE (vn) - 63 + 255;
      tp = __GMP_ALLOCATE_FUNC_LIMBS (tn);
      mpn_toom3_mul_n (tp, up, vp, vn, tp + 2 * vn);
    }

  if (un != vn)
    {
      if (un - vn < vn)
	ref_mpn_mul (wp + vn, vp, vn, up + vn, un - vn);
      else
	ref_mpn_mul (wp + vn, up + vn, un - vn, vp, vn);

      MPN_COPY (wp, tp, vn);
      cy = mpn_add_n (wp + vn, wp + vn, tp + vn, vn);
      mpn_incr_u (wp + 2 * vn, cy);
    }
  else
    {
      MPN_COPY (wp, tp, 2 * vn);
    }

  __GMP_FREE_FUNC_LIMBS (tp, tn);
}
コード例 #23
0
ファイル: matrix22_mul.c プロジェクト: BrianGladman/mpir
static int
add_signed_n (mp_ptr rp,
	      mp_srcptr ap, int as, mp_srcptr bp, int bs, mp_size_t n)
{
  if (as != bs)
    return as ^ abs_sub_n (rp, ap, bp, n);
  else
    {
      ASSERT_NOCARRY (mpn_add_n (rp, ap, bp, n));
      return as;
    }
}
コード例 #24
0
ファイル: fastfp.c プロジェクト: SumanKNath/PBC.Net
static void fp_halve(element_ptr r, element_ptr a) {
  fp_field_data_ptr p = r->field->data;
  const size_t t = p->limbs;
  int carry = 0;
  mp_limb_t *alimb = a->data;
  mp_limb_t *rlimb = r->data;
  if (alimb[0] & 1) carry = mpn_add_n(rlimb, alimb, p->primelimbs, t);
  else fp_set(r, a);

  mpn_rshift(rlimb, rlimb, t, 1);
  if (carry) rlimb[t - 1] |= ((mp_limb_t) 1) << (sizeof(mp_limb_t) * 8 - 1);
}
コード例 #25
0
ファイル: addmul_1.c プロジェクト: macssh/macssh
mp_limb_t
mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t limb)
{
  mp_ptr p0, p1, tp;
  mp_limb_t cy_limb;
  TMP_DECL (marker);
  TMP_MARK (marker);

  p1 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
  p0 = TMP_ALLOC (n * BYTES_PER_MP_LIMB);
  tp = TMP_ALLOC (n * BYTES_PER_MP_LIMB);

  GMPN_MULWW (p1, p0, up, &n, &limb);
  cy_limb = mpn_add_n (tp, rp, p0, n);
  rp[0] = tp[0];
  cy_limb += mpn_add_n (rp + 1, tp + 1, p1, n - 1);
  cy_limb += p1[n - 1];

  TMP_FREE (marker);
  return cy_limb;
}
コード例 #26
0
ファイル: t-bdiv.c プロジェクト: AllardJ/Tomato
void
check_one (mp_ptr qp, mp_srcptr rp, mp_limb_t rh,
	   mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, const char *fname)
{
  mp_size_t qn;
  int cmp;
  mp_ptr tp;
  mp_limb_t cy = 4711;		/* silence warnings */
  TMP_DECL;

  qn = nn - dn;

  if (qn == 0)
    return;

  TMP_MARK;

  tp = TMP_ALLOC_LIMBS (nn + 1);

  if (dn >= qn)
    mpn_mul (tp, dp, dn, qp, qn);
  else
    mpn_mul (tp, qp, qn, dp, dn);

  if (rp != NULL)
    {
      cy = mpn_add_n (tp + qn, tp + qn, rp, dn);
      cmp = cy != rh || mpn_cmp (tp, np, nn) != 0;
    }
  else
    cmp = mpn_cmp (tp, np, nn - dn) != 0;

  if (cmp != 0)
    {
      printf ("\r*******************************************************************************\n");
      printf ("%s inconsistent in test %lu\n", fname, test);
      printf ("N=   "); dumpy (np, nn);
      printf ("D=   "); dumpy (dp, dn);
      printf ("Q=   "); dumpy (qp, qn);
      if (rp != NULL)
	{
	  printf ("R=   "); dumpy (rp, dn);
	  printf ("Rb=  %d, Cy=%d\n", (int) cy, (int) rh);
	}
      printf ("T=   "); dumpy (tp, nn);
      printf ("nn = %ld, dn = %ld, qn = %ld", nn, dn, qn);
      printf ("\n*******************************************************************************\n");
      abort ();
    }

  TMP_FREE;
}
コード例 #27
0
ファイル: toom4_mul_n.c プロジェクト: BrianGladman/mpir
void tc4_copy (mp_ptr yp, mp_size_t * yn, mp_size_t offset, mp_srcptr xp, mp_size_t xn)
{
  mp_size_t yu = ABS(*yn);
  mp_size_t xu = ABS(xn);
  mp_limb_t cy = 0;

  if (xn == 0)
    return;

  if (offset < yu) /* low part of x overlaps with y */
  {
      if (offset + xu <= yu) /* x entirely inside y */
      {
          cy = mpn_add_n (yp + offset, yp + offset, xp, xu);
          if (offset + xu < yu)
            cy = mpn_add_1 (yp + offset + xu, yp + offset + xu,
                            yu - (offset + xu), cy);
      } else
        cy = mpn_add_n (yp + offset, yp + offset, xp, yu - offset);
      /* now cy is the carry at yp + yu */
      if (xu + offset > yu) /* high part of x exceeds y */
      {
          MPN_COPY (yp + yu, xp + yu - offset, xu + offset - yu);
          cy = mpn_add_1 (yp + yu, yp + yu, xu + offset - yu, cy);
          yu = xu + offset;
      }
      /* now cy is the carry at yp + yn */
      if (cy)
        yp[yu++] = cy;
      MPN_NORMALIZE(yp, yu);
      *yn = yu;
  } else /* x does not overlap */
  {
      if (offset > yu)
        MPN_ZERO (yp + yu, offset - yu);
      MPN_COPY (yp + offset, xp, xu);
      *yn = offset + xu;
  }
}
コード例 #28
0
/* Input is {ap,rn}; output is {rp,rn}, computation is
   mod B^rn - 1, and values are semi-normalised; zero is represented
   as either 0 or B^n - 1.  Needs a scratch of 2rn limbs at tp.
   tp==rp is allowed. */
static void
mpn_bc_sqrmod_bnm1 (mp_ptr rp, mp_srcptr ap, mp_size_t rn, mp_ptr tp)
{
  mp_limb_t cy;

  ASSERT (0 < rn);

  mpn_sqr (tp, ap, rn);
  cy = mpn_add_n (rp, tp, tp + rn, rn);
  /* If cy == 1, then the value of rp is at most B^rn - 2, so there can
   * be no overflow when adding in the carry. */
  MPN_INCR_U (rp, rn, cy);
}
コード例 #29
0
ファイル: invert.c プロジェクト: AlexeiSheplyakov/gmp.pkg
void
mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
{
  ASSERT (n > 0);
  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));

  if (n == 1)
    invert_limb (*ip, *dp);
  else {
    TMP_DECL;

    TMP_MARK;
    if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
      {
	/* Maximum scratch needed by this branch: 2*n */
	mp_size_t i;
	mp_ptr xp;

	xp = scratch;				/* 2 * n limbs */
	for (i = n - 1; i >= 0; i--)
	  xp[i] = GMP_NUMB_MAX;
	mpn_com (xp + n, dp, n);
	if (n == 2) {
	  mpn_divrem_2 (ip, 0, xp, 4, dp);
	} else {
	  gmp_pi1_t inv;
	  invert_pi1 (inv, dp[n-1], dp[n-2]);
	  /* FIXME: should we use dcpi1_div_q, for big sizes? */
	  mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32);
	}
      }
    else { /* Use approximated inverse; correct the result if needed. */
      mp_limb_t e; /* The possible error in the approximate inverse */

      ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
      e = mpn_ni_invertappr (ip, dp, n, scratch);

      if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
	/* Code to detect and correct the "off by one" approximation. */
	mpn_mul_n (scratch, ip, dp, n);
	ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n));
	if (! mpn_add (scratch, scratch, 2*n, dp, n))
	  MPN_INCR_U (ip, n, 1); /* The value was wrong, correct it.  */
      }
    }
    TMP_FREE;
  }
}
コード例 #30
0
ファイル: invert.c プロジェクト: AaronNGray/texlive-libs
void
mpn_invert (mp_ptr ip, mp_srcptr dp, mp_size_t n, mp_ptr scratch)
{
  ASSERT (n > 0);
  ASSERT (dp[n-1] & GMP_NUMB_HIGHBIT);
  ASSERT (! MPN_OVERLAP_P (ip, n, dp, n));
  ASSERT (! MPN_OVERLAP_P (ip, n, scratch, mpn_invertappr_itch(n)));
  ASSERT (! MPN_OVERLAP_P (dp, n, scratch, mpn_invertappr_itch(n)));

  if (n == 1)
    invert_limb (*ip, *dp);
  else if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
    {
	/* Maximum scratch needed by this branch: 2*n */
	mp_size_t i;
	mp_ptr xp;

	xp = scratch;				/* 2 * n limbs */
	/* n > 1 here */
	i = n;
	do
	  xp[--i] = GMP_NUMB_MAX;
	while (i);
	mpn_com (xp + n, dp, n);
	if (n == 2) {
	  mpn_divrem_2 (ip, 0, xp, 4, dp);
	} else {
	  gmp_pi1_t inv;
	  invert_pi1 (inv, dp[n-1], dp[n-2]);
	  /* FIXME: should we use dcpi1_div_q, for big sizes? */
	  mpn_sbpi1_div_q (ip, xp, 2 * n, dp, n, inv.inv32);
	}
    }
  else { /* Use approximated inverse; correct the result if needed. */
      mp_limb_t e; /* The possible error in the approximate inverse */

      ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
      e = mpn_ni_invertappr (ip, dp, n, scratch);

      if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
	/* Code to detect and correct the "off by one" approximation. */
	mpn_mul_n (scratch, ip, dp, n);
	e = mpn_add_n (scratch, scratch, dp, n); /* FIXME: we only need e.*/
	if (LIKELY(e)) /* The high part can not give a carry by itself. */
	  e = mpn_add_nc (scratch + n, scratch + n, dp, n, e); /* FIXME:e */
	/* If the value was wrong (no carry), correct it (increment). */
	e ^= CNST_LIMB (1);
	MPN_INCR_U (ip, n, e);
      }
  }
}