Beispiel #1
0
void fft_convolution(mp_limb_t ** ii, mp_limb_t ** jj, long depth, 
                              long limbs, long trunc, mp_limb_t ** t1, 
                          mp_limb_t ** t2, mp_limb_t ** s1, mp_limb_t * tt)
{
   long n = (1L<<depth), j;
   long w = (limbs*FLINT_BITS)/n;
   long sqrt = (1L<<(depth/2));
   
   if (depth <= 6)
   {
      trunc = 2*((trunc + 1)/2);
      
      fft_truncate_sqrt2(ii, n, w, t1, t2, s1, trunc);
   
      if (ii != jj)
         fft_truncate_sqrt2(jj, n, w, t1, t2, s1, trunc);

      for (j = 0; j < trunc; j++)
      {
         mpn_normmod_2expp1(ii[j], limbs);
         if (ii != jj) mpn_normmod_2expp1(jj[j], limbs);
         
         fft_mulmod_2expp1(ii[j], ii[j], jj[j], n, w, tt);
      }

      ifft_truncate_sqrt2(ii, n, w, t1, t2, s1, trunc);

      for (j = 0; j < trunc; j++)
      {
         mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 2);
         mpn_normmod_2expp1(ii[j], limbs);
      }
   } else
   {
      trunc = 2*sqrt*((trunc + 2*sqrt - 1)/(2*sqrt));
      
      fft_mfa_truncate_sqrt2_outer(ii, n, w, t1, t2, s1, sqrt, trunc);
      
      if (ii != jj)
         fft_mfa_truncate_sqrt2_outer(jj, n, w, t1, t2, s1, sqrt, trunc);
      
      fft_mfa_truncate_sqrt2_inner(ii, jj, n, w, t1, t2, s1, sqrt, trunc, tt);
      
      ifft_mfa_truncate_sqrt2_outer(ii, n, w, t1, t2, s1, sqrt, trunc);
   }
}
void mul_mfa_truncate_sqrt2(mp_ptr r1, mp_srcptr i1, mp_size_t n1,
                        mp_srcptr i2, mp_size_t n2, mp_bitcnt_t depth, mp_bitcnt_t w)
{
   mp_size_t n = (UWORD(1)<<depth);
   mp_bitcnt_t bits1 = (n*w - (depth+1))/2; 
   mp_size_t sqrt = (UWORD(1)<<(depth/2));

   mp_size_t r_limbs = n1 + n2;
   mp_size_t limbs = (n*w)/FLINT_BITS;
   mp_size_t size = limbs + 1;

   mp_size_t j1 = (n1*FLINT_BITS - 1)/bits1 + 1;
   mp_size_t j2 = (n2*FLINT_BITS - 1)/bits1 + 1;
   
   mp_size_t i, j, trunc;

   mp_limb_t ** ii, ** jj, * t1, * t2, * s1, * ptr;
   mp_limb_t * tt;
   
   ii = flint_malloc((4*(n + n*size) + 5*size)*sizeof(mp_limb_t));
   for (i = 0, ptr = (mp_limb_t *) ii + 4*n; i < 4*n; i++, ptr += size) 
   {
      ii[i] = ptr;
   }
   t1 = ptr;
   t2 = t1 + size;
   s1 = t2 + size;
   tt = s1 + size;
   
   if (i1 != i2)
   {
      jj = flint_malloc(4*(n + n*size)*sizeof(mp_limb_t));
      for (i = 0, ptr = (mp_limb_t *) jj + 4*n; i < 4*n; i++, ptr += size) 
      {
         jj[i] = ptr;
      }
   } else jj = ii;
   
   trunc = j1 + j2 - 1;
   if (trunc <= 2*n) trunc = 2*n + 1;
   trunc = 2*sqrt*((trunc + 2*sqrt - 1)/(2*sqrt)); /* trunc must be divisible by 2*sqrt */

   j1 = fft_split_bits(ii, i1, n1, bits1, limbs);
   for (j = j1 ; j < 4*n; j++)
      flint_mpn_zero(ii[j], limbs + 1);
   
   fft_mfa_truncate_sqrt2_outer(ii, n, w, &t1, &t2, &s1, sqrt, trunc);
   
   if (i1 != i2)
   {
      j2 = fft_split_bits(jj, i2, n2, bits1, limbs);
      for (j = j2 ; j < 4*n; j++)
         flint_mpn_zero(jj[j], limbs + 1);

      fft_mfa_truncate_sqrt2_outer(jj, n, w, &t1, &t2, &s1, sqrt, trunc);
   } else j2 = j1;
   
   fft_mfa_truncate_sqrt2_inner(ii, jj, n, w, &t1, &t2, &s1, sqrt, trunc, tt);
   ifft_mfa_truncate_sqrt2_outer(ii, n, w, &t1, &t2, &s1, sqrt, trunc);
       
   flint_mpn_zero(r1, r_limbs);
   fft_combine_bits(r1, ii, j1 + j2 - 1, bits1, limbs, r_limbs);
     
   flint_free(ii);
   if (i1 != i2)
      flint_free(jj);
}