Пример #1
0
mp_limb_t	divexact_submul(mp_ptr qp,mp_ptr xp,mp_size_t n)
{int j;mp_limb_t c,m,t1,t2,t3,acc,ax,dx,t;

ASSERT(n>0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));
m=0;m=~m;m=m/3;// m=(B-1)/3
c=0;t1=t2=t3=acc=0;
    umul_ppmm(dx,ax,xp[0],m);
    SUB(c,acc,0,t1);
    ADC(c,t2,0,ax,c);
    ADC(c,t3,0,dx,c);
    ASSERT(c==0);
    t1=t2;t2=t3;
for(j=1;j<=n-1;j++)
   {
    t3=0;
    umul_ppmm(dx,ax,xp[j],m);
    SUB(c,acc,acc,t1);
    qp[j-1]=acc;
    ADC(c,t2,t2,ax,c);
    ADC(c,t3,t3,dx,c);
    ASSERT(c==0);
    t1=t2;t2=t3;
   }
    SUB(c,acc,acc,t1);
    qp[n-1]=acc;
    ADC(c,t2,t2,0,c);
    t=(t2-acc)*3;
// return next quotient*-3    
return t;}   // so  (xp,n) = (qp,n)*3 -ret*B^n    and 0 <= ret < 3
Пример #2
0
/* (rp, 2n) = (xp, n)*(yp, n) / B^n */ 
inline static void
mpn_mulshort_n_basecase(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
  mp_size_t i, k;

#if GMP_NAIL_BITS==0
  mp_limb_t t1, t2, t3;
#endif

  ASSERT(n >= 3);  /* this restriction doesn't make a lot of sense in general */
  ASSERT_MPN(xp, n);
  ASSERT_MPN(yp, n);
  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n));
  ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n));

  k = n - 2; /* so want short product sum_(i + j >= k) x[i]y[j]B^(i + j) */

#if GMP_NAIL_BITS!=0
  rp[n] = mpn_mul_1(rp + k, xp + k, 2, yp[0]);
#else

  umul_ppmm(t1, rp[k], xp[k], yp[0]);
  umul_ppmm(t3, t2, xp[k + 1], yp[0]);
  add_ssaaaa(rp[n], rp[k + 1], t3, t2, 0, t1);
#endif

  for (i = 1; i <= n - 2; i++)
     rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]);
  
  rp[n + n - 1] = mpn_addmul_1 (rp + n - 1, xp, n, yp[n - 1]);
  
  return;
}
Пример #3
0
Файл: t-mul.c Проект: HRF92/mpir
static void
mul_basecase (mp_ptr wp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
{
  mp_size_t i, j;
  mp_limb_t prod_low, prod_high;
  mp_limb_t cy_dig;
  mp_limb_t v_limb;

  /* Multiply by the first limb in V separately, as the result can
     be stored (not added) to PROD.  We also avoid a loop for zeroing.  */
  v_limb = vp[0];
  cy_dig = 0;
  for (j = un; j > 0; j--)
    {
      mp_limb_t u_limb, w_limb;
      u_limb = *up++;
      umul_ppmm (prod_high, prod_low, u_limb, v_limb << GMP_NAIL_BITS);
      add_ssaaaa (cy_dig, w_limb, prod_high, prod_low, 0, cy_dig << GMP_NAIL_BITS);
      *wp++ = w_limb >> GMP_NAIL_BITS;
    }

  *wp++ = cy_dig;
  wp -= un;
  up -= un;

  /* For each iteration in the outer loop, multiply one limb from
     U with one limb from V, and add it to PROD.  */
  for (i = 1; i < vn; i++)
    {
      v_limb = vp[i];
      cy_dig = 0;

      for (j = un; j > 0; j--)
	{
	  mp_limb_t u_limb, w_limb;
	  u_limb = *up++;
	  umul_ppmm (prod_high, prod_low, u_limb, v_limb << GMP_NAIL_BITS);
	  w_limb = *wp;
	  add_ssaaaa (prod_high, prod_low, prod_high, prod_low, 0, w_limb << GMP_NAIL_BITS);
	  prod_low >>= GMP_NAIL_BITS;
	  prod_low += cy_dig;
#if GMP_NAIL_BITS == 0
	  cy_dig = prod_high + (prod_low < cy_dig);
#else
	  cy_dig = prod_high;
	  cy_dig += prod_low >> GMP_NUMB_BITS;
#endif
	  *wp++ = prod_low & GMP_NUMB_MASK;
	}

      *wp++ = cy_dig;
      wp -= un;
      up -= un;
    }
}
Пример #4
0
int
_nmod_vec_dot_bound_limbs(slong len, nmod_t mod)
{
    mp_limb_t t2, t1, t0, u1, u0;

    umul_ppmm(t1, t0, mod.n - 1, mod.n - 1);
    umul_ppmm(t2, t1, t1, len);
    umul_ppmm(u1, u0, t0, len);
    add_sssaaaaaa(t2, t1, t0,  t2, t1, UWORD(0),  UWORD(0), u1, u0);

    if (t2 != 0) return 3;
    if (t1 != 0) return 2;
    return (t0 != 0);
}
Пример #5
0
void
_nmod_mat_mul_transpose_3(nmod_mat_t C, const nmod_mat_t A, const nmod_mat_t B)
{
    long i, j, k;

    register mp_limb_t s0, s1, s2;
    register mp_limb_t t0, t1;
    register mp_limb_t c1, c2;

    for (i = 0; i < A->r; i++)
    {
        for (j = 0; j < B->r; j++)
        {
            s0 = s1 = s2 = 0UL;

            for (k = 0; k < A->c; k++)
            {
                umul_ppmm(t1, t0, A->rows[i][k], B->rows[j][k]);
                add_ssaaaa(c1, s0, (mp_limb_t) 0, s0, (mp_limb_t) 0, t0);
                add_ssaaaa(c2, s1, (mp_limb_t) 0, s1, (mp_limb_t) 0, t1);
                add_ssaaaa(s2, s1, s2, s1, c2, c1);
            }

            NMOD_RED(s2, s2, C->mod);
            NMOD_RED3(s0, s2, s1, s0, C->mod);
            C->rows[i][j] = s0;
        }
    }
}
Пример #6
0
/*
   (xp, n) = (qp, n)*f - ret*B^n and 0 <= ret < f

   Note the divexact_by3 code is just a special case of this
*/
mp_limb_t mpn_divexact_byfobm1(mp_ptr qp, mp_srcptr xp, mp_size_t n,
                                             mp_limb_t f, mp_limb_t Bm1of)
{
   mp_size_t j;
   mp_limb_t c, acc, ax, dx;

   ASSERT(n > 0);
   ASSERT_MPN(xp, n);
   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));
   ASSERT(Bm1of*f + 1 == 0);

   acc = 0*Bm1of; /* carry in is 0 */

   for (j = 0; j <= n - 1; j++)
   {
      umul_ppmm(dx, ax, xp[j], Bm1of);
    
      SUBC_LIMB(c, acc, acc, ax);
    
      qp[j] = acc;
      acc -= dx + c;
   }

   /* return next quotient*(-f) */ 

   return acc*(-f);
}   
Пример #7
0
/* (xp, n) = (qp, n)*3 - ret*B^n and 0 <= ret < 3 */
mp_limb_t mpn_divexact_by3c(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t ci)
{
   mp_size_t j;
   mp_limb_t c, m, acc, ax, dx;

   ASSERT(n > 0);
   ASSERT_MPN(xp, n);
   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));

   m = 0;
   m = ~m;
   m = m/3; /* m = (B - 1)/3 */

   acc = ci*m;

   for (j = 0; j <= n - 1; j++)
   {
      umul_ppmm(dx, ax, xp[j], m);
    
      SUBC_LIMB(c, acc, acc, ax);
    
      qp[j] = acc;
      acc -= dx + c;
   }

   /* return next quotient*(-3) */    

   return acc*(-3);
}   
Пример #8
0
void
_nmod_mat_mul_transpose_2(nmod_mat_t C, const nmod_mat_t A, const nmod_mat_t B)
{
    long i, j, k;

    register mp_limb_t s0, s1;
    register mp_limb_t t0, t1;

    for (i = 0; i < A->r; i++)
    {
        for (j = 0; j < B->r; j++)
        {
            s0 = s1 = 0UL;

            for (k = 0; k < A->c; k++)
            {
                umul_ppmm(t1, t0, A->rows[i][k], B->rows[j][k]);
                add_ssaaaa(s1, s0, s1, s0, t1, t0);
            }

            NMOD2_RED2(s0, s1, s0, C->mod);
            C->rows[i][j] = s0;
        }
    }
}
Пример #9
0
mp_limb_t
mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size, mp_limb_t d, mp_limb_t h)
{
  mp_limb_t  s, x, y, inverse, dummy, dmul, c1, c2;
  mp_limb_t  c = 0;
  mp_size_t  i;

  ASSERT (size >= 1);
  ASSERT (d & 1);

  binvert_limb (inverse, d);
  dmul = d << GMP_NAIL_BITS;

  for (i = 0; i < size; i++)
    {
      ASSERT (c==0 || c==1);

      s = src[i];
      SUBC_LIMB (c1, x, s, c);

      SUBC_LIMB (c2, y, x, h);
      c = c1 + c2;

      y = (y * inverse) & GMP_NUMB_MASK;
      umul_ppmm (h, dummy, y, dmul);
    }

  h += c;
  return h;
}
Пример #10
0
mp_limb_t div_preinv1(mp_limb_t d1, mp_limb_t d2)
{
   mp_limb_t q, r[2], p[2], cy;
   
   if (d2 + 1 == 0 && d1 + 1 == 0)
      return 0;

   if (d1 + 1 == 0)
      q = ~d1, r[1] = ~d2;
   else
      udiv_qrnnd(q, r[1], ~d1, ~d2, d1 + 1);

   r[0] = 0;

   if (d2 + 1 == 0)
      add_ssaaaa(cy, r[1], 0, r[1], 0, q);   
   else
   {
      umul_ppmm(p[1], p[0], q, ~d2 - 1);
      cy = mpn_add_n(r, r, p, 2);
   }
 
   p[0] = d2 + 1, p[1] = d1 + (d2 + 1 == 0);
   if (cy || mpn_cmp(r, p, 2) >= 0)
      q++;
   
   return q;
}
mpi_limb_t
mpihelp_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
		  mpi_size_t s1_size, mpi_limb_t s2_limb)
{
    mpi_limb_t cy_limb;
    mpi_size_t j;
    mpi_limb_t prod_high, prod_low;
    mpi_limb_t x;

    /* The loop counter and index J goes from -SIZE to -1.  This way
     * the loop becomes faster.  */
    j = -s1_size;
    res_ptr -= j;
    s1_ptr -= j;

    cy_limb = 0;
    do {
	umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb );

	prod_low += cy_limb;
	cy_limb = (prod_low < cy_limb?1:0) + prod_high;

	x = res_ptr[j];
	prod_low = x + prod_low;
	cy_limb += prod_low < x?1:0;
	res_ptr[j] = prod_low;
    } while ( ++j );
    return cy_limb;
}
Пример #12
0
void
nmod_mat_mul_check(nmod_mat_t C, const nmod_mat_t A, const nmod_mat_t B)
{
    long i, j, k;

    mp_limb_t s0, s1, s2;
    mp_limb_t t0, t1;

    for (i = 0; i < A->r; i++)
    {
        for (j = 0; j < B->c; j++)
        {
            s0 = s1 = s2 = 0UL;

            for (k = 0; k < A->c; k++)
            {
                umul_ppmm(t1, t0, A->rows[i][k], B->rows[k][j]);
                add_sssaaaaaa(s2, s1, s0, s2, s1, s0, 0, t1, t0);
            }

            NMOD_RED(s2, s2, C->mod);
            NMOD_RED3(s0, s2, s1, s0, C->mod);
            C->rows[i][j] = s0;
        }
    }
}
mpi_limb_t
mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
		 mpi_size_t s1_size, mpi_limb_t s2_limb)
{
	mpi_limb_t cy_limb;
	mpi_size_t j;
	mpi_limb_t prod_high, prod_low;
	mpi_limb_t x;

	/*                                                              
                              */
	j = -s1_size;
	res_ptr -= j;
	s1_ptr -= j;

	cy_limb = 0;
	do {
		umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);

		prod_low += cy_limb;
		cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;

		x = res_ptr[j];
		prod_low = x + prod_low;
		cy_limb += prod_low < x ? 1 : 0;
		res_ptr[j] = prod_low;
	} while (++j);
	return cy_limb;
}
Пример #14
0
void
mpn_sqr_basecase (mp_ptr rp, mp_srcptr up, mp_size_t n)
{
  mp_size_t i;
  mp_limb_t tarr[2 * SQR_KARATSUBA_THRESHOLD];
  mp_ptr tp = tarr;
  mp_limb_t cy;

  /* must fit 2*n limbs in tarr */
  ASSERT (n <= SQR_KARATSUBA_THRESHOLD);

  if ((n & 1) != 0)
    {
      if (n == 1)
	{
	  mp_limb_t ul, lpl;
	  ul = up[0];
	  umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
	  rp[0] = lpl >> GMP_NAIL_BITS;
	  return;
	}

      MPN_ZERO (tp, n);

      for (i = 0; i <= n - 2; i += 2)
	{
	  cy = mpn_addmul_2s (tp + 2 * i, up + i + 1, n - (i + 1), up + i);
	  tp[n + i] = cy;
	}
    }
Пример #15
0
int main(void)
{
   int i, result;
   flint_rand_t state;
   
   printf("xgcd....");
   fflush(stdout);
   
   flint_randinit(state);

   for (i = 0; i < 100000; i++) 
   {
      mp_limb_t a, b, c, g, bits1, bits2, bits3, ph, pl, qh, ql;
      mp_limb_t s, t;
      
      bits1 = n_randint(state, FLINT_BITS-1) + 1;
      bits2 = n_randint(state, bits1) + 1;
      bits3 = n_randint(state, FLINT_BITS - bits1) + 1;

      do
      {
         a = n_randbits(state, bits1);
         b = n_randbits(state, bits2);
      } while ((n_gcd(a, b) != 1UL) || (b > a));

      c = n_randbits(state, bits3);

      g = n_xgcd(&s, &t, a*c, b*c);

      umul_ppmm(ph, pl, a*c, s);
      umul_ppmm(qh, ql, b*c, t);
      sub_ddmmss(ph, pl, ph, pl, qh, ql);
      
      result = ((g == c) && (ph == 0UL) && (pl == c));
      if (!result)
      {
         printf("FAIL:\n");
         printf("a = %lu, b = %lu, c = %lu, g = %lu, s = %lu, t = %lu\n", a, b, c, g, s, t); 
         abort();
      }
   }

   flint_randclear(state);

   printf("PASS\n");
   return 0;
}
Пример #16
0
/* Return {xp, xn} mod p.
   Assume 2p < B where B = 2^GMP_NUMB_LIMB.
   We first compute {xp, xn} / B^n mod p using Montgomery reduction,
   where the number N to factor has n limbs.
   Then we multiply by B^(n+1) mod p (precomputed) and divide by B mod p.
   Assume invm = -1/p mod B and Bpow = B^n mod p */
static mp_limb_t
ecm_mod_1 (mp_ptr xp, mp_size_t xn, mp_limb_t p, mp_size_t n,
           mp_limb_t invm, mp_limb_t Bpow)
{
  mp_limb_t q, cy, hi, lo, x0, x1;

  if (xn == 0)
    return 0;

  /* the code below assumes xn <= n+1, thus we call mpn_mod_1 otherwise,
     but this should never (or rarely) happen */
  if (xn > n + 1)
    return mpn_mod_1 (xp, xn, p);

  x0 = xp[0];
  cy = (mp_limb_t) 0;
  while (n-- > 0)
    {
      /* Invariant: cy is the input carry on xp[1], x0 is xp[0] */
      x1 = (xn > 1) ? xp[1] : 0;
      q = x0 * invm; /* q = -x0/p mod B */
      umul_ppmm (hi, lo, q, p); /* hi*B + lo = -x0 mod B */
      /* Add hi*B + lo to x1*B + x0. Since p <= B-2 we have
         hi*B + lo <= (B-1)(B-2) = B^2-3B+2, thus hi <= B-3 */
      hi += cy + (lo != 0); /* cannot overflow */
      x0 = x1 + hi;
      cy = x0 < hi;
      xn --;
      xp ++;
    }
  if (cy != 0)
    x0 -= p;
  /* now x0 = {xp, xn} / B^n mod p */
  umul_ppmm (x1, x0, x0, Bpow);
  /* since Bpow < p, x1 <= p-1 */
  q = x0 * invm;
  umul_ppmm (hi, lo, q, p);
  /* hi <= p-1 thus hi+x1+1 < 2p-1 < B */
  hi = hi + x1 + (lo != 0);
  while (hi >= p)
    hi -= p;
  return hi;
}
Пример #17
0
/*
 * Multiply x and y, reducing the result modulo n.
 */
uint64_t mul_mod_n(uint64_t x, uint64_t y, uint64_t n)
{
#if 0
	uint64_t q, r, p1, p2;
	umul_ppmm(p1, p2, x, y);
	udiv_qrnnd(q, r, p1, p2, n);
	return r;
#endif
	return (x * y) % n;
}
Пример #18
0
/* in each round we remove one limb from the body, i.e. k = 1 */
void mpn_mod_1_3(mp_ptr rem, mp_srcptr xp, mp_size_t xn, mp_srcptr db)
{
   mp_limb_t h, l, sh, sl, th, tl;
   mp_size_t j, jj;
 
   ASSERT(xn >= 5);
   ASSERT_MPN(xp, xn);
   ASSERT_LIMB(db[0]);
   ASSERT_LIMB(db[1]);
   ASSERT_LIMB(db[2]);
   ASSERT_LIMB(db[3]);

   tl = xp[xn - 2];
   th = xp[xn - 1];

   for (j = xn - 5; j >= 0; j -= 3)
   {
      umul_ppmm(sh, sl, xp[j + 1], db[0]);
      add_ssaaaa(sh, sl, sh, sl, 0, xp[j]);
      umul_ppmm(h, l, xp[j + 2], db[1]);
      add_ssaaaa(sh, sl, sh, sl, h, l);
      umul_ppmm(h, l, tl, db[2]);
      add_ssaaaa(sh, sl, sh, sl, h, l);
      umul_ppmm(th, tl, th, db[3]);
      add_ssaaaa(th, tl, th, tl, sh, sl);
   }

   if (j > -3) /* we have at least three limbs to do, i.e. xp[0], ..., tl, th */
   {
      sh = 0;
      sl = xp[0];
      jj = 1;

      if (j == -1)
      {
         umul_ppmm(sh, sl, xp[1], db[0]);
         add_ssaaaa(sh, sl, sh, sl, 0, xp[0]);
         jj = 2;
      }

      umul_ppmm(h, l, tl, db[jj - 1]);
      add_ssaaaa(sh, sl, sh, sl, h, l);
      umul_ppmm(th, tl, th, db[jj]);
      add_ssaaaa(th, tl, th, tl, sh, sl);
   }

   umul_ppmm(h, l, th, db[0]);
   add_ssaaaa(h, l, h, l, 0, tl);

   rem[0] = l;
   rem[1] = h;
}
Пример #19
0
/* Put in  rp[n..2n-1] an approximation of the n high limbs
   of {up, n} * {vp, n}. The error is less than n ulps of rp[n] (and the
   approximation is always less or equal to the truncated full product).
   Assume 2n limbs are allocated at rp.

   Implements Algorithm ShortMulNaive from [1].
*/
static void
mpfr_mulhigh_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up,
                         mpfr_limb_srcptr vp, mp_size_t n)
{
  mp_size_t i;

  rp += n - 1;
  umul_ppmm (rp[1], rp[0], up[n-1], vp[0]); /* we neglect up[0..n-2]*vp[0],
                                               which is less than B^n */
  for (i = 1 ; i < n ; i++)
    /* here, we neglect up[0..n-i-2] * vp[i], which is less than B^n too */
    rp[i + 1] = mpn_addmul_1 (rp, up + (n - i - 1), i + 1, vp[i]);
  /* in total, we neglect less than n*B^n, i.e., n ulps of rp[n]. */
}
Пример #20
0
// basic divexact
mp_limb_t	divexact_basic(mp_ptr qp,mp_ptr xp,mp_size_t n,mp_limb_t d)
{int j;mp_limb_t c,h,q,dummy,h1,t,m;

ASSERT(n>0);ASSERT(d!=0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));
ASSERT(d%2==1);modlimb_invert(m,d);
c=0;h=0;t=0;
for(j=0;j<=n-1;j++)
   {h1=xp[j];
    t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}// set borrow to c ; sbb t,h1 ; set c to borrow
    q=h1*m;
    qp[j]=q;
    umul_ppmm(h,dummy,q,d);
    ASSERT(dummy==h1);}
// ie returns next quotient*-d
return h+c;}   // so  (xp,n) = (qp,n)*d -ret*B^n    and 0 <= ret < d
Пример #21
0
mp_limb_t	divexact3_direct(mp_ptr qp,mp_ptr xp,mp_size_t n)
{int j;mp_limb_t c,m,acc,ax,dx;

ASSERT(n>0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));
m=0;m=~m;m=m/3;// m=(B-1)/3
c=0;t1=t2=t3=acc=0;
for(j=0;j<=n-1;j++)
   {
    umul_ppmm(dx,ax,xp[j],m);
    SBB(c,acc,acc,ax,c);
    qp[j]=acc;
    SBB(c,acc,acc,dx,c);
   }
    SBB(c,acc,acc,0,c);
// return next quotient*-3    
return acc*-3;}   // so  (xp,n) = (qp,n)*3 -ret*B^n    and 0 <= ret < 3
Пример #22
0
mp_limb_t	divexact3_byluck(mp_ptr qp,mp_ptr xp,mp_size_t n)
{int j;mp_limb_t c,m,acc,ax,dx;

ASSERT(n>0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));
m=0;m=~m;m=m/3;// m=(B-1)/3
c=0;acc=0;
for(j=0;j<=n-1;j++)
   {   
    umul_ppmm(dx,ax,xp[j],m);	// line 1
    SUB(c,acc,acc,ax);		// line 2
    qp[j]=acc;			// line 3
    SBB(c,acc,acc,dx,c);	// line 4
    if(c!=0){printf("c not zero\n");abort();}
   }
// return next quotient*-3    
return acc*-3;}   // so  (xp,n) = (qp,n)*3 -ret*B^n    and 0 <= ret < 3
Пример #23
0
/* Define our own squaring function, which uses mpn_sqr_basecase for its
   allowed sizes, but its own code for larger sizes.  */
static void
mpn_local_sqr (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_ptr tp)
{
  mp_size_t i;

  ASSERT (n >= 1);
  ASSERT (! MPN_OVERLAP_P (rp, 2*n, up, n));

  if (BELOW_THRESHOLD (n, SQR_BASECASE_LIM))
    {
      mpn_sqr_basecase (rp, up, n);
      return;
    }

  {
    mp_limb_t ul, lpl;
    ul = up[0];
    umul_ppmm (rp[1], lpl, ul, ul << GMP_NAIL_BITS);
    rp[0] = lpl >> GMP_NAIL_BITS;
  }
  if (n > 1)
    {
      mp_limb_t cy;

      cy = mpn_mul_1 (tp, up + 1, n - 1, up[0]);
      tp[n - 1] = cy;
      for (i = 2; i < n; i++)
	{
	  mp_limb_t cy;
	  cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
	  tp[n + i - 2] = cy;
	}
      MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);

      {
	mp_limb_t cy;
#if HAVE_NATIVE_mpn_addlsh1_n
	cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
#else
	cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
	cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
#endif
	rp[2 * n - 1] += cy;
      }
    }
}
Пример #24
0
void _nmod_vec_scalar_mul_nmod(mp_ptr res, mp_srcptr vec, 
				                  slong len, mp_limb_t c, nmod_t mod)
{
   if (mod.norm >= FLINT_BITS/2) /* products will fit in a limb */
   {
      mpn_mul_1(res, vec, len, c);
	  _nmod_vec_reduce(res, res, len, mod);
   } else /* products may take two limbs */
   {
	  slong i;
	  for (i = 0; i < len; i++)
	  {
         mp_limb_t hi, lo;
	     umul_ppmm(hi, lo, vec[i], c);
		 NMOD_RED2(res[i], hi, lo, mod); /* hi already reduced mod n */
	  }
   }
}
Пример #25
0
mp_limb_t
mpn_bdiv_dbm1c (mp_ptr qp, mp_srcptr ap, mp_size_t n, mp_limb_t bd, mp_limb_t h)
{
  mp_limb_t a, p0, p1, cy;
  mp_size_t i;

  for (i = 0; i < n; i++)
    {
      a = ap[i];
      umul_ppmm (p1, p0, a, bd << GMP_NAIL_BITS);
      p0 >>= GMP_NAIL_BITS;
      cy = h < p0;
      h = (h - p0) & GMP_NUMB_MASK;
      qp[i] = h;
      h = h - p1 - cy;
    }

  return h;
}
Пример #26
0
int main(void)
{
   int i, result;
   FLINT_TEST_INIT(state);
   
   flint_printf("mulmod_precomp....");
   fflush(stdout);

   

   for (i = 0; i < 100000 * flint_test_multiplier(); i++)
   {
      mp_limb_t a, b, d, r1, r2, p1, p2, dinv;
      double dpre;

      mp_limb_t bits = n_randint(state, FLINT_D_BITS) + 1;
      d = n_randtest_bits(state, bits);
      a = n_randtest(state) % d;
      b = n_randtest(state) % d;
      
      dpre = n_precompute_inverse(d);

      r1 = n_mulmod_precomp(a, b, d, dpre);

      umul_ppmm(p1, p2, a, b);
      dinv = n_preinvert_limb(d);
      r2 = n_ll_mod_preinv(p1, p2, d, dinv);

      result = (r1 == r2);
      if (!result)
      {
         flint_printf("FAIL:\n");
         flint_printf("a = %wu, b = %wu, d = %wu, dinv = %f\n", a, b, d, dpre); 
         flint_printf("r1 = %wu, r2 = %wu\n", r1, r2);
         abort();
      }
   }

   FLINT_TEST_CLEANUP(state);
   
   flint_printf("PASS\n");
   return 0;
}
Пример #27
0
mp_limb_t n_clog(mp_limb_t n, mp_limb_t b)
{
    mp_limb_t r, p, t, phi;

    r = 0;
    p = 1;

    while (1)
    {
        umul_ppmm(phi, t, p, b);

        if (t <= n && !phi)
        {
            r++;
            p = t;
        }
        else
            return r + (p != n);
    }
}
Пример #28
0
/* (xp, n) = (qp, n)*d - ret*B^n and 0 <= ret < d */
mp_limb_t mpn_divrem_hensel_qr_1_1(mp_ptr qp, mp_srcptr xp, mp_size_t n, mp_limb_t d)
{
   mp_size_t j;
   mp_limb_t c, h, q, dummy, h1, t, m;

   ASSERT(n > 0);
   ASSERT_MPN(xp, n);
   ASSERT(MPN_SAME_OR_SEPARATE_P(qp, xp, n));
   ASSERT(d%2 == 1);
   
   modlimb_invert(m, d);

   c = 0;
   h = 0;
   t = 0;

   for (j = 0; j <= n - 1; j++)
   {
      h1 = xp[j];
      t = h + c;
      
      if (t > h1)
      {
         h1 = h1 - t;
         c = 1;
      }
      else
      {
         h1 = h1 - t;
         c = 0;
      }
    
      q = h1*m;
      qp[j] = q;
      umul_ppmm(h, dummy, q, d);
    
      ASSERT(dummy == h1);
   }

   return h + c;
}   
Пример #29
0
void
fmpz_mul_si(fmpz_t f, const fmpz_t g, long x)
{
    fmpz c2 = *g;

    if (x == 0)
    {
        fmpz_zero(f);
        return;
    }
    else if (!COEFF_IS_MPZ(c2)) /* c2 is small */
    {
        mp_limb_t prod[2];
        mp_limb_t uc2 = FLINT_ABS(c2);
        mp_limb_t ux = FLINT_ABS(x);

        /* unsigned limb by limb multiply (assembly for most CPU's) */
        umul_ppmm(prod[1], prod[0], uc2, ux);
        if (!prod[1])           /* result fits in one limb */
        {
            fmpz_set_ui(f, prod[0]);
            if ((c2 ^ x) < 0L)
                fmpz_neg(f, f);
        }
        else                    /* result takes two limbs */
        {
            __mpz_struct *mpz_ptr = _fmpz_promote(f);
            /* two limbs, least significant first, native endian, no nails, stored in prod */
            mpz_import(mpz_ptr, 2, -1, sizeof(mp_limb_t), 0, 0, prod);
            if ((c2 ^ x) < 0L)
                mpz_neg(mpz_ptr, mpz_ptr);
        }
    }
    else                        /* c2 is large */
    {
        __mpz_struct *mpz_ptr = _fmpz_promote(f);   /* ok without val as if aliased both are large */
        mpz_mul_si(mpz_ptr, COEFF_TO_PTR(c2), x);
    }
}
Пример #30
0
/* in each round we remove one limb from the body, i.e. k = 1 */
void mpn_mod_1_2(mp_ptr rem, mp_srcptr xp, mp_size_t xn, mp_srcptr db)
{
   mp_limb_t h, l, sh, sl, th, tl;
   mp_size_t j;
 
   ASSERT(xn >= 4);
   ASSERT_MPN(xp, xn);
   ASSERT_LIMB(db[0]);
   ASSERT_LIMB(db[1]);
   ASSERT_LIMB(db[2]);

   tl = xp[xn - 2];
   th = xp[xn - 1];

   for (j = xn - 4; j >= 0; j -= 2)
   {
      umul_ppmm(sh, sl, xp[j + 1], db[0]);
      add_ssaaaa(sh, sl, sh, sl, 0, xp[j]);
      umul_ppmm(h, l, tl, db[1]);
      add_ssaaaa(sh, sl, sh, sl, h, l);
      umul_ppmm(th, tl, th, db[2]);
      add_ssaaaa(th, tl, th, tl, sh, sl);
   }

   if (j > -2) /* we have at least three limbs to do i.e. xp[0], ..., tl, th */
   {
      umul_ppmm(sh, sl, tl, db[0]);
      add_ssaaaa(sh, sl, sh, sl, 0, xp[0]);
      umul_ppmm(th, tl, th, db[1]);
      add_ssaaaa(th, tl, th, tl, sh, sl);
   }

   umul_ppmm(h, l, th, db[0]);
   add_ssaaaa(h, l, h, l, 0, tl);

   rem[0] = l;
   rem[1] = h;
}