Beispiel #1
0
void ifft_butterfly(mp_limb_t * s, mp_limb_t * t, mp_limb_t * i1, 
                    mp_limb_t * i2, mp_size_t i, mp_size_t limbs, mp_bitcnt_t w)
{
   mp_size_t y;
   mp_bitcnt_t b1;
   
   b1 = i*w;
   y  = b1/FLINT_BITS;
   b1 = b1%FLINT_BITS;

   mpn_div_2expmod_2expp1(i2, i2, limbs, b1);
   butterfly_rshB(s, t, i1, i2, limbs, 0, y);
}
Beispiel #2
0
void mpir_ifft_trunc1(mp_ptr * ii, mp_size_t n, mp_bitcnt_t w, 
                    mp_ptr * t1, mp_ptr * t2, mp_size_t trunc)
{
    mp_size_t i;
    mp_size_t limbs = (w*n)/GMP_LIMB_BITS;
    
    if (trunc == 2*n)
        mpir_ifft_radix2(ii, n, w, t1, t2);
    else if (trunc <= n)
    {
        for (i = trunc; i < n; i++)
        {
            mpn_add_n(ii[i], ii[i], ii[i+n], limbs + 1);
            mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, 1);
        }
      
        mpir_ifft_trunc1(ii, n/2, 2*w, t1, t2, trunc);

        for (i = 0; i < trunc; i++)
        {
#if  HAVE_NATIVE_mpn_addsub_n
            mpn_addsub_n(ii[i], ii[i], ii[i], ii[n+i], limbs + 1);
#else
            mpn_add_n(ii[i], ii[i], ii[i], limbs + 1);
            mpn_sub_n(ii[i], ii[i], ii[n+i], limbs + 1);
#endif
        }
    } else
    {
        mpir_ifft_radix2(ii, n/2, 2*w, t1, t2);

        for (i = trunc - n; i < n; i++)
        {
            mpn_sub_n(ii[i+n], ii[i], ii[i+n], limbs + 1);
            mpir_fft_adjust(*t1, ii[i+n], i, limbs, w);
            mpn_add_n(ii[i], ii[i], ii[i+n], limbs + 1);
            MP_PTR_SWAP(ii[i+n], *t1);
        }
   
        mpir_ifft_trunc1(ii+n, n/2, 2*w, t1, t2, trunc - n);

        for (i = 0; i < trunc - n; i++) 
        {   
            mpir_ifft_butterfly(*t1, *t2, ii[i], ii[n+i], i, limbs, w);
   
            MP_PTR_SWAP(ii[i],   *t1);
            MP_PTR_SWAP(ii[n+i], *t2);
        }
    }
}
Beispiel #3
0
void fft_convolution(mp_limb_t ** ii, mp_limb_t ** jj, long depth, 
                              long limbs, long trunc, mp_limb_t ** t1, 
                          mp_limb_t ** t2, mp_limb_t ** s1, mp_limb_t * tt)
{
   long n = (1L<<depth), j;
   long w = (limbs*FLINT_BITS)/n;
   long sqrt = (1L<<(depth/2));
   
   if (depth <= 6)
   {
      trunc = 2*((trunc + 1)/2);
      
      fft_truncate_sqrt2(ii, n, w, t1, t2, s1, trunc);
   
      if (ii != jj)
         fft_truncate_sqrt2(jj, n, w, t1, t2, s1, trunc);

      for (j = 0; j < trunc; j++)
      {
         mpn_normmod_2expp1(ii[j], limbs);
         if (ii != jj) mpn_normmod_2expp1(jj[j], limbs);
         
         fft_mulmod_2expp1(ii[j], ii[j], jj[j], n, w, tt);
      }

      ifft_truncate_sqrt2(ii, n, w, t1, t2, s1, trunc);

      for (j = 0; j < trunc; j++)
      {
         mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 2);
         mpn_normmod_2expp1(ii[j], limbs);
      }
   } else
   {
      trunc = 2*sqrt*((trunc + 2*sqrt - 1)/(2*sqrt));
      
      fft_mfa_truncate_sqrt2_outer(ii, n, w, t1, t2, s1, sqrt, trunc);
      
      if (ii != jj)
         fft_mfa_truncate_sqrt2_outer(jj, n, w, t1, t2, s1, sqrt, trunc);
      
      fft_mfa_truncate_sqrt2_inner(ii, jj, n, w, t1, t2, s1, sqrt, trunc, tt);
      
      ifft_mfa_truncate_sqrt2_outer(ii, n, w, t1, t2, s1, sqrt, trunc);
   }
}
Beispiel #4
0
int
main(void)
{
    mp_bitcnt_t bits;
    mp_size_t j, k, n, w, limbs, d;
    mp_limb_t * nn, * r;
    mpz_t p, m1, m2, mn1, mn2;

    gmp_randstate_t state;

    tests_start();
    fflush(stdout);

    gmp_randinit_default(state);

    mpz_init(m1);
    mpz_init(m2);
    mpz_init(mn1);
    mpz_init(mn2);
    mpz_init(p);

    /* normalisation mod p = 2^wn + 1 where B divides nw and n is a power of 2 */
    for (bits = GMP_LIMB_BITS; bits < 16*GMP_LIMB_BITS; bits += GMP_LIMB_BITS)
    {
        for (j = 1; j < 32; j++)
        {
            for (k = 1; k <= GMP_LIMB_BITS; k <<= 1)
            {
                for (d = 0; d < GMP_LIMB_BITS; d++)
                {
                    n = bits/k;
                    w = j*k;
                    limbs = (n*w)/GMP_LIMB_BITS;
            
                    nn = malloc((limbs + 1)*sizeof(mp_limb_t));
                    r  = malloc((limbs + 1)*sizeof(mp_limb_t));
                    mpir_random_fermat(nn, state, limbs);
                    mpir_fermat_to_mpz(mn1, nn, limbs);
                    set_p(p, n, w);
            
                    mpn_div_2expmod_2expp1(r, nn, limbs, d);
                    mpir_fermat_to_mpz(m2, r, limbs);
                    mpz_mod(m2, m2, p);
                    
                    mpz_mod(m1, mn1, p);
                    mpz_mul_2exp(m2, m2, d);
                    mpz_mod(m2, m2, p);
                    
                    if (mpz_cmp(m1, m2) != 0)
                    {
                        printf("FAIL:\n");
                        printf("mpn_div_2expmod_2expp1 error\n");
                        gmp_printf("want %Zx\n\n", m1);
                        gmp_printf("got  %Zx\n", m2);
                        abort();
                    }
                }

                free(nn);
                free(r);
            }
        }
    }

    mpz_clear(mn2);
    mpz_clear(mn1);
    mpz_clear(m2);
    mpz_clear(m1);
    mpz_clear(p);

    gmp_randclear(state);
    
    tests_end();
    return 0;
}
int
main(void)
{
    mp_bitcnt_t depth, w;

    flint_rand_t state;

    printf("fft/ifft_mfa_truncate_sqrt2....");
    fflush(stdout);

    flint_randinit(state);
    _flint_rand_init_gmp(state);

    for (depth = 6; depth <= 13; depth++)
    {
        for (w = 1; w <= 5; w++)
        {
            mp_size_t n = (1UL<<depth);
            mp_size_t trunc = 2*n + n_randint(state, 2*n) + 1;
            mp_size_t n1 = (1UL<<(depth/2));
            mp_size_t limbs = (n*w)/GMP_LIMB_BITS;
            mp_size_t size = limbs + 1;
            mp_size_t i;
            mp_limb_t * ptr;
            mp_limb_t ** ii, ** jj, * t1, * t2, * s1;
        
            trunc = 2*n1*((trunc + 2*n1 - 1)/(2*n1));

            ii = flint_malloc((4*(n + n*size) + 3*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) ii + 4*n; i < 4*n; i++, ptr += size) 
            {
                ii[i] = ptr;
                random_fermat(ii[i], state, limbs);
            }
            t1 = ptr;
            t2 = t1 + size;
            s1 = t2 + size;
   
            for (i = 0; i < 4*n; i++)
               mpn_normmod_2expp1(ii[i], limbs);
    
            jj = flint_malloc(4*(n + n*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) jj + 4*n; i < 4*n; i++, ptr += size) 
            {
                jj[i] = ptr;
                mpn_copyi(jj[i], ii[i], size);
            }
   
            fft_mfa_truncate_sqrt2(ii, n, w, &t1, &t2, &s1, n1, trunc);
            ifft_mfa_truncate_sqrt2(ii, n, w, &t1, &t2, &s1, n1, trunc);
            for (i = 0; i < trunc; i++)
            {
                mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 2);
                mpn_normmod_2expp1(ii[i], limbs);
            }

            for (i = 0; i < trunc; i++)
            {
                if (mpn_cmp(ii[i], jj[i], size) != 0)
                {
                    printf("FAIL:\n");
                    printf("n = %ld, trunc = %ld\n", n, trunc);
                    printf("Error in entry %ld\n", i);
                    abort();
                }
            }

            flint_free(ii);
            flint_free(jj);
        }
    }

    flint_randclear(state);
    
    printf("PASS\n");
    return 0;
}
Beispiel #6
0
int
main(void)
{
    mp_bitcnt_t depth, w;
    
    flint_rand_t state;

    printf("fft/ifft_radix2....");
    fflush(stdout);

    flint_randinit(state);
    _flint_rand_init_gmp(state);

    for (depth = 6; depth <= 12; depth++)
    {
        for (w = 1; w <= 5; w++)
        {
            mp_size_t n = (1UL<<depth);
            mp_size_t limbs = (n*w)/GMP_LIMB_BITS;
            mp_size_t size = limbs + 1;
            mp_size_t i;
            mp_limb_t * ptr;
            mp_limb_t ** ii, ** jj, *t1, *t2;
        
            ii = flint_malloc((2*(n + n*size) + 2*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size) 
            {
                ii[i] = ptr;
                random_fermat(ii[i], state, limbs);
            }
            t1 = ptr;
            t2 = t1 + size;
   
            for (i = 0; i < 2*n; i++)
               mpn_normmod_2expp1(ii[i], limbs);
    
            jj = flint_malloc(2*(n + n*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size) 
            {
                jj[i] = ptr;
                mpn_copyi(jj[i], ii[i], size);
            }
   
            fft_radix2(ii, n, w, &t1, &t2);
            ifft_radix2(ii, n, w, &t1, &t2);
            for (i = 0; i < 2*n; i++)
            {
                mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1);
                mpn_normmod_2expp1(ii[i], limbs);
            }

            for (i = 0; i < 2*n; i++)
            {
                if (mpn_cmp(ii[i], jj[i], size) != 0)
                {
                    printf("FAIL:\n");
                    printf("Error in entry %ld\n", i);
                    abort();
                }
            }

            flint_free(ii);
            flint_free(jj);
        }
    }

    flint_randclear(state);
    
    printf("PASS\n");
    return 0;
}
Beispiel #7
0
void mpir_fft_mulmod_2expp1(mp_ptr r1, mp_srcptr i1, mp_srcptr i2, 
                 mp_size_t r_limbs, mp_bitcnt_t depth, mp_bitcnt_t w)
{
   mp_size_t n = (((mp_size_t)1)<<depth);
   mp_bitcnt_t bits1 = (r_limbs*GMP_LIMB_BITS)/(2*n);
   
   mp_size_t limb_add, limbs = (n*w)/GMP_LIMB_BITS;
   mp_size_t size = limbs + 1;
   mp_size_t i, j, ll;

   mp_limb_t * ptr;
   mp_limb_t ** ii, ** jj, *tt, *t1, *t2, *s1, *r, *ii0, *jj0;
   mp_limb_t c;
   TMP_DECL;

   TMP_MARK;
   ii = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 4*n + 5*size);
   for (i = 0, ptr = (mp_ptr) ii + 2*n; i < 2*n; i++, ptr += size) 
   {
      ii[i] = ptr;
   }
   ii0 = ptr;
   t1 = ii0 + 2*n;
   t2 = t1 + size;
   s1 = t2 + size;
   r = s1 + size;
   tt = r + 2*n;
   
   if (i1 != i2)
   {
      jj = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 2*n);
      for (i = 0, ptr = (mp_ptr) jj + 2*n; i < 2*n; i++, ptr += size) 
      {
         jj[i] = ptr;
      }
      jj0 = ptr;
   } else
   {
      jj = ii;
      jj0 = ii0;
   }

   j = mpir_fft_split_bits(ii, i1, r_limbs, bits1, limbs);
   for ( ; j < 2*n; j++)
      mpn_zero(ii[j], limbs + 1);

   for (i = 0; i < 2*n; i++)
      ii0[i] = ii[i][0];
 
   mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1);
   for (j = 0; j < 2*n; j++)
      mpn_normmod_2expp1(ii[j], limbs);

   if (i1 != i2)
   {
      j = mpir_fft_split_bits(jj, i2, r_limbs, bits1, limbs);
      for ( ; j < 2*n; j++)
          mpn_zero(jj[j], limbs + 1);

      for (i = 0; i < 2*n; i++)
         jj0[i] = jj[i][0];

      mpir_fft_negacyclic(jj, n, w, &t1, &t2, &s1);
   }
      
   for (j = 0; j < 2*n; j++)
   {
      if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs);
      c = 2*ii[j][limbs] + jj[j][limbs];

      ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt);
   }
   
   mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1);
   
   mpir_fft_naive_convolution_1(r, ii0, jj0, 2*n);

   for (j = 0; j < 2*n; j++)
   {
      mp_limb_t t, cy2;
      
      mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 1);
      mpn_normmod_2expp1(ii[j], limbs);

      t = ii[j][limbs];
      ii[j][limbs] = r[j] - ii[j][0];
      cy2 = mpn_add_1(ii[j], ii[j], limbs + 1, ii[j][limbs]);
      add_ssaaaa(r[j], ii[j][limbs], 0, ii[j][limbs], 0, t);
      if (cy2) r[j]++;
   }
   
   mpn_zero(r1, r_limbs + 1);
   mpir_fft_combine_bits(r1, ii, 2*n - 1, bits1, limbs + 1, r_limbs + 1);
   
   /* 
      as the negacyclic convolution has effectively done subtractions
      some of the coefficients will be negative, so need to subtract p
   */
   ll = 0;
   limb_add = bits1/GMP_LIMB_BITS;
   
   for (j = 0; j < 2*n - 2; j++)
   {   
      if (r[j]) 
         mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1);
      else if ((mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */
      {
         mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1);
         mpn_sub_1(r1 + ll + limbs + 1, r1 + ll + limbs + 1, r_limbs - limbs - ll, 1);
      }

      ll += limb_add;
   }
   /* penultimate coefficient, top bit was already ignored */
   if (r[j] || (mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */
      mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1);
   
   /* final coefficient wraps around */
   if (limb_add)
      r1[r_limbs] += mpn_add_n(r1 + r_limbs - limb_add, r1 + r_limbs - limb_add, ii[2*n - 1], limb_add);
   c = mpn_sub_n(r1, r1, ii[2*n - 1] + limb_add, limbs + 1 - limb_add);
   mpn_addmod_2expp1_1(r1 + limbs + 1 - limb_add, r_limbs - limbs - 1 + limb_add, -c);
   mpn_normmod_2expp1(r1, r_limbs);
   
   TMP_FREE;
}
int
main(void)
{
    mp_bitcnt_t depth, w;

    gmp_randstate_t state;

    tests_start();
    fflush(stdout);

    gmp_randinit_default(state);

    for (depth = 6; depth <= 12; depth++)
    {
        for (w = 1; w <= 5; w++)
        {
            mp_size_t n = (((mp_limb_t)1)<<depth);
            mp_size_t limbs = (n*w)/GMP_LIMB_BITS;
            mp_size_t size = limbs + 1;
            mp_size_t i;
            mp_limb_t * ptr;
            mp_limb_t ** ii, ** jj, * t1, * t2, * s1;

            ii = malloc((2*(n + n*size) + 3*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size)
            {
                ii[i] = ptr;
                mpir_random_fermat(ii[i], state, limbs);
            }
            t1 = ptr;
            t2 = t1 + size;
            s1 = t2 + size;

            for (i = 0; i < 2*n; i++)
                mpn_normmod_2expp1(ii[i], limbs);

            jj = malloc(2*(n + n*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size)
            {
                jj[i] = ptr;
                mpn_copyi(jj[i], ii[i], size);
            }

            mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1);
            mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1);
            for (i = 0; i < 2*n; i++)
            {
                mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1);
                mpn_normmod_2expp1(ii[i], limbs);
            }

            for (i = 0; i < 2*n; i++)
            {
                if (mpn_cmp(ii[i], jj[i], size) != 0)
                {
                    printf("FAIL:\n");
                    printf("Error in entry %ld\n", i);
                    abort();
                }
            }

            free(ii);
            free(jj);
        }
    }

    gmp_randclear(state);

    tests_end();
    return 0;
}