Exemple #1
0
void mpn_mul_fft_main(mp_limb_t * r1, mp_limb_t * i1, mp_size_t n1, 
                        mp_limb_t * i2, mp_size_t n2)
{
   mp_size_t off, depth = 6;
   mp_size_t w = 1;
   mp_size_t n = ((mp_size_t) 1 << depth);
   mp_bitcnt_t bits = (n*w - (depth+1))/2;

   mp_bitcnt_t bits1 = n1*FLINT_BITS;
   mp_bitcnt_t bits2 = n2*FLINT_BITS;

   mp_size_t j1 = (bits1 - 1)/bits + 1;
   mp_size_t j2 = (bits2 - 1)/bits + 1;

   FLINT_ASSERT(n1 > 0);
   FLINT_ASSERT(n2 > 0);
   FLINT_ASSERT(j1 + j2 - 1 > 2*n);

   while (j1 + j2 - 1 > 4*n) /* find initial n, w */
   {
      if (w == 1) w = 2;
      else 
      {
         depth++;
         w = 1;
         n *= 2;
      }

      bits = (n*w - (depth+1))/2;
      j1 = (bits1 - 1)/bits + 1;
      j2 = (bits2 - 1)/bits + 1;
   }

   if (depth < 11)
   {
      mp_size_t wadj = 1;
      
      off = fft_tuning_table[depth - 6][w - 1]; /* adjust n and w */
      depth -= off;
      n = ((mp_size_t) 1 << depth);
      w *= ((mp_size_t) 1 << (2*off));
      
      if (depth < 6) wadj = ((mp_size_t) 1 << (6 - depth));

      if (w > wadj)
      {
         do { /* see if a smaller w will work */
            w -= wadj;
            bits = (n*w - (depth+1))/2;
            j1 = (bits1 - 1)/bits + 1;
            j2 = (bits2 - 1)/bits + 1;
         } while (j1 + j2 - 1 <= 4*n && w > wadj);  
         w += wadj;
      }

      mul_truncate_sqrt2(r1, i1, n1, i2, n2, depth, w);
   } else
      mul_mfa_truncate_sqrt2(r1, i1, n1, i2, n2, depth, w);
}
Exemple #2
0
int bernoulli_mod_p_mpz(unsigned long *res, unsigned long p)
{
   FLINT_ASSERT(p > 2);
   FLINT_ASSERT(z_isprime(p) == 1);
   
   unsigned long g, g_inv, g_sqr, g_sqr_inv;
   double p_inv = z_precompute_inverse(p);
   g = z_primitive_root(p);
   
   if(!g)
   {
      return FALSE;
   }
   
   g_inv = z_invert(g, p);
   g_sqr = z_mulmod_precomp(g, g, p, p_inv);
   g_sqr_inv = z_mulmod_precomp(g_inv, g_inv, p, p_inv);
   
   unsigned long poly_size = (p-1)/2;
   
   int is_odd = poly_size % 2;
   
   unsigned long g_power, g_power_inv;
   g_power = g_inv;
   g_power_inv = 1;
   
   // constant is (g-1)/2 mod p
   unsigned long constant;
   if(g % 2)
   {
      constant = (g-1)/2;
   }
   else
   {
      constant = (g+p-1)/2;
   }
   
   // fudge holds g^{i^2}, fudge_inv holds g^{-i^2}
   unsigned long fudge, fudge_inv;
   fudge = fudge_inv = 1;
   
   // compute the polynomials F(X) and G(X)
   mpz_poly_t F, G;
   
   mpz_poly_init2(F, poly_size);
   mpz_poly_init2(G, poly_size);
   
   unsigned long i, temp, h;
   
   for(i = 0; i < poly_size; i++)
   {  
      // compute h(g^i)/g^i (h(x) is as in latex notes)
      temp = g * g_power;
            
      h = z_mulmod_precomp(p + constant - (temp / p), g_power_inv, p, p_inv);
      
      g_power = z_mod_precomp(temp, p, p_inv);
      g_power_inv = z_mulmod_precomp(g_power_inv, g_inv, p, p_inv);
      
      // store coefficient g^{i^2} h(g^i)/g^i
      mpz_poly_set_coeff_ui(G, i, z_mulmod_precomp(h, fudge, p, p_inv));
      mpz_poly_set_coeff_ui(F, i, fudge_inv);
      
      // update fudge and fudge_inv
      fudge = z_mulmod_precomp(z_mulmod_precomp(fudge, g_power, p, p_inv), z_mulmod_precomp(g_power, g, p, p_inv), p, p_inv);
      fudge_inv = z_mulmod_precomp(z_mulmod_precomp(fudge_inv, g_power_inv, p, p_inv), z_mulmod_precomp(g_power_inv, g, p, p_inv), p, p_inv);
   }
   
   mpz_poly_set_coeff_ui(F, 0, 0);
   
   // step 2: multiply the polynomials...
   mpz_poly_t product;
   mpz_poly_init(product);
   mpz_poly_mul(product, G, F);
   
   // step 3: assemble the result...   
   unsigned long g_sqr_power, value;
   g_sqr_power = g_sqr;
   fudge = g;

   res[0] = 1;
   
   mpz_t value_coeff;
   mpz_init(value_coeff);
   
   unsigned long value_coeff_ui;

   for(i = 1; i < poly_size; i++)
   {
      mpz_poly_get_coeff(value_coeff, product, i + poly_size);
      value = mpz_fdiv_ui(value_coeff, p);
      
      value = z_mod_precomp(mpz_poly_get_coeff_ui(product, i + poly_size), p, p_inv);
      
      mpz_poly_get_coeff(value_coeff, product, i);
      if(is_odd)
      {
         value = z_mod_precomp(mpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + p - value, p, p_inv);
      }
      else
      {
         value = z_mod_precomp(mpz_poly_get_coeff_ui(G, i) + mpz_fdiv_ui(value_coeff, p) + value, p, p_inv);
      }
      
      value = z_mulmod_precomp(z_mulmod_precomp(z_mulmod_precomp(4, i, p, p_inv), fudge, p, p_inv), value, p, p_inv);
      value = z_mulmod_precomp(value, z_invert(p+1-g_sqr_power, p), p, p_inv);

      res[i] = value;
      
      g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv);
      fudge = z_mulmod_precomp(fudge, g_sqr_power, p, p_inv);
      g_sqr_power = z_mulmod_precomp(g_sqr_power, g, p, p_inv);
   }
   
   mpz_clear(value_coeff);
   
   mpz_poly_clear(F);
   mpz_poly_clear(G);
   mpz_poly_clear(product);
   
   return TRUE;
}