void fft_mfa_truncate_sqrt2_inner(mp_limb_t ** ii, mp_limb_t ** jj, mp_size_t n, 
                   mp_bitcnt_t w, mp_limb_t ** t1, mp_limb_t ** t2, 
                  mp_limb_t ** temp, mp_size_t n1, mp_size_t trunc, mp_limb_t * tt)
{
   mp_size_t i, j, s;
   mp_size_t n2 = (2*n)/n1;
   mp_size_t trunc2 = (trunc - 2*n)/n1;
   mp_size_t limbs = (n*w)/FLINT_BITS;
   mp_bitcnt_t depth = 0;
   mp_bitcnt_t depth2 = 0;
   
   while ((UWORD(1)<<depth) < n2) depth++;
   while ((UWORD(1)<<depth2) < n1) depth2++;

   ii += 2*n;
   jj += 2*n;

   /* convolutions on relevant rows */
   for (s = 0; s < trunc2; s++)
   {
      i = n_revbin(s, depth);
      fft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);
      if (ii != jj) fft_radix2(jj + i*n1, n1/2, w*n2, t1, t2);
      
      for (j = 0; j < n1; j++)
      {
         mp_size_t t = i*n1 + j;
         mpn_normmod_2expp1(ii[t], limbs);
         if (ii != jj) mpn_normmod_2expp1(jj[t], limbs);
         fft_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt);
      }      
      
      ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);
   }

   ii -= 2*n;
   jj -= 2*n;

   /* convolutions on rows */
   for (i = 0; i < n2; i++)
   {
      fft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);
      if (ii != jj) fft_radix2(jj + i*n1, n1/2, w*n2, t1, t2);

      for (j = 0; j < n1; j++)
      {
         mp_size_t t = i*n1 + j;
         mpn_normmod_2expp1(ii[t], limbs);
         if (ii != jj) mpn_normmod_2expp1(jj[t], limbs);
         fft_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt);
      }      
      
      ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);
   }
}
示例#2
0
void fft_convolution(mp_limb_t ** ii, mp_limb_t ** jj, long depth, 
                              long limbs, long trunc, mp_limb_t ** t1, 
                          mp_limb_t ** t2, mp_limb_t ** s1, mp_limb_t * tt)
{
   long n = (1L<<depth), j;
   long w = (limbs*FLINT_BITS)/n;
   long sqrt = (1L<<(depth/2));
   
   if (depth <= 6)
   {
      trunc = 2*((trunc + 1)/2);
      
      fft_truncate_sqrt2(ii, n, w, t1, t2, s1, trunc);
   
      if (ii != jj)
         fft_truncate_sqrt2(jj, n, w, t1, t2, s1, trunc);

      for (j = 0; j < trunc; j++)
      {
         mpn_normmod_2expp1(ii[j], limbs);
         if (ii != jj) mpn_normmod_2expp1(jj[j], limbs);
         
         fft_mulmod_2expp1(ii[j], ii[j], jj[j], n, w, tt);
      }

      ifft_truncate_sqrt2(ii, n, w, t1, t2, s1, trunc);

      for (j = 0; j < trunc; j++)
      {
         mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 2);
         mpn_normmod_2expp1(ii[j], limbs);
      }
   } else
   {
      trunc = 2*sqrt*((trunc + 2*sqrt - 1)/(2*sqrt));
      
      fft_mfa_truncate_sqrt2_outer(ii, n, w, t1, t2, s1, sqrt, trunc);
      
      if (ii != jj)
         fft_mfa_truncate_sqrt2_outer(jj, n, w, t1, t2, s1, sqrt, trunc);
      
      fft_mfa_truncate_sqrt2_inner(ii, jj, n, w, t1, t2, s1, sqrt, trunc, tt);
      
      ifft_mfa_truncate_sqrt2_outer(ii, n, w, t1, t2, s1, sqrt, trunc);
   }
}
示例#3
0
int mpn_mulmod_Bexpp1(mp_ptr r, mp_srcptr i1, mp_srcptr i2, mp_size_t limbs, mp_ptr tt)
{
   mp_size_t bits = limbs * GMP_LIMB_BITS;
   mp_bitcnt_t depth1, depth = 1;
   mp_size_t w1, off;

   mp_limb_t c = 2 * i1[limbs] + i2[limbs];
   
   if (c & 1)
   {
      mpn_neg_n(r, i1, limbs + 1);
      mpn_normmod_2expp1(r, limbs);
      return 0;
   } else if (c & 2)
   {
      mpn_neg_n(r, i2, limbs + 1);
      mpn_normmod_2expp1(r, limbs);
      return 0;
   }

   if (limbs <= FFT_MULMOD_2EXPP1_CUTOFF) 
   {
       if(bits)
          r[limbs] = mpn_mulmod_2expp1_basecase(r, i1, i2, c, bits, tt);
       else
          r[limbs] = 0;
       return r[limbs];
   }
   while ((((mp_limb_t)1)<<depth) < bits) depth++;
   
   if (depth < 12) off = mulmod_2expp1_table_n[0];
   else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12];
   depth1 = depth/2 - off;
   
   w1 = bits/(((mp_limb_t)1)<<(2*depth1));

   mpir_fft_mulmod_2expp1(r, i1, i2, limbs, depth1, w1);

   return r[limbs];
}
int
main(void)
{
    mp_bitcnt_t depth, w;

    flint_rand_t state;

    printf("fft/ifft_mfa_truncate_sqrt2....");
    fflush(stdout);

    flint_randinit(state);
    _flint_rand_init_gmp(state);

    for (depth = 6; depth <= 13; depth++)
    {
        for (w = 1; w <= 5; w++)
        {
            mp_size_t n = (1UL<<depth);
            mp_size_t trunc = 2*n + n_randint(state, 2*n) + 1;
            mp_size_t n1 = (1UL<<(depth/2));
            mp_size_t limbs = (n*w)/GMP_LIMB_BITS;
            mp_size_t size = limbs + 1;
            mp_size_t i;
            mp_limb_t * ptr;
            mp_limb_t ** ii, ** jj, * t1, * t2, * s1;
        
            trunc = 2*n1*((trunc + 2*n1 - 1)/(2*n1));

            ii = flint_malloc((4*(n + n*size) + 3*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) ii + 4*n; i < 4*n; i++, ptr += size) 
            {
                ii[i] = ptr;
                random_fermat(ii[i], state, limbs);
            }
            t1 = ptr;
            t2 = t1 + size;
            s1 = t2 + size;
   
            for (i = 0; i < 4*n; i++)
               mpn_normmod_2expp1(ii[i], limbs);
    
            jj = flint_malloc(4*(n + n*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) jj + 4*n; i < 4*n; i++, ptr += size) 
            {
                jj[i] = ptr;
                mpn_copyi(jj[i], ii[i], size);
            }
   
            fft_mfa_truncate_sqrt2(ii, n, w, &t1, &t2, &s1, n1, trunc);
            ifft_mfa_truncate_sqrt2(ii, n, w, &t1, &t2, &s1, n1, trunc);
            for (i = 0; i < trunc; i++)
            {
                mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 2);
                mpn_normmod_2expp1(ii[i], limbs);
            }

            for (i = 0; i < trunc; i++)
            {
                if (mpn_cmp(ii[i], jj[i], size) != 0)
                {
                    printf("FAIL:\n");
                    printf("n = %ld, trunc = %ld\n", n, trunc);
                    printf("Error in entry %ld\n", i);
                    abort();
                }
            }

            flint_free(ii);
            flint_free(jj);
        }
    }

    flint_randclear(state);
    
    printf("PASS\n");
    return 0;
}
示例#5
0
int
main(void)
{
    int i, result;
    flint_rand_t state;

    printf("get/set_fft....");
    fflush(stdout);

    flint_randinit(state);
    
     /* convert back and forth and compare */
    for (i = 0; i < 10000; i++)
    {
        fmpz * a, * b;
        mp_bitcnt_t bits;
        long len, limbs;
        mp_limb_t ** ii, * ptr;
        long i, bt;

        bits = n_randint(state, 300) + 1;
        len = n_randint(state, 300) + 1;
        limbs = 2*((bits - 1)/FLINT_BITS + 1);
        
        ii = flint_malloc((len + len*(limbs + 1))*sizeof(mp_limb_t));
        ptr = (mp_limb_t *) ii + len;
        for (i = 0; i < len; i++, ptr += (limbs + 1))
           ii[i] = ptr;

        a = _fmpz_vec_init(len);
        b = _fmpz_vec_init(len);
        _fmpz_vec_randtest(a, state, len, bits);

        bt = _fmpz_vec_get_fft(ii, a, limbs, len);
        for (i = 0; i < len; i++)
           mpn_normmod_2expp1(ii[i], limbs);
        _fmpz_vec_set_fft(b, len, ii, limbs, bt < 0);
        
        result = (_fmpz_vec_equal(a, b, len));
        if (!result)
        {
            printf("FAIL:\n");
            _fmpz_vec_print(a, len), printf("\n\n");
            _fmpz_vec_print(b, len), printf("\n\n");
            abort();
        }

        _fmpz_vec_clear(a, len);
        _fmpz_vec_clear(b, len);
    }
        
     /* convert back and forth unsigned and compare */
    for (i = 0; i < 10000; i++)
    {
        fmpz * a, * b;
        mp_bitcnt_t bits;
        long len, limbs;
        mp_limb_t ** ii, * ptr;
        long i, bt;

        bits = n_randint(state, 300) + 1;
        len = n_randint(state, 300) + 1;
        limbs = 2*((bits - 1)/FLINT_BITS + 1);
        
        ii = flint_malloc((len + len*(limbs + 1))*sizeof(mp_limb_t));
        ptr = (mp_limb_t *) ii + len;
        for (i = 0; i < len; i++, ptr += (limbs + 1))
           ii[i] = ptr;

        a = _fmpz_vec_init(len);
        b = _fmpz_vec_init(len);
        _fmpz_vec_randtest_unsigned(a, state, len, bits);

        bt = _fmpz_vec_get_fft(ii, a, limbs, len);
        _fmpz_vec_set_fft(b, len, ii, limbs, bt < 0);
        
        result = (_fmpz_vec_equal(a, b, len));
        if (!result)
        {
            printf("FAIL:\n");
            _fmpz_vec_print(a, len), printf("\n\n");
            _fmpz_vec_print(b, len), printf("\n\n");
            abort();
        }

        _fmpz_vec_clear(a, len);
        _fmpz_vec_clear(b, len);
    }
        
    flint_randclear(state);
    _fmpz_cleanup();
    printf("PASS\n");
    return 0;
}
示例#6
0
int
main(void)
{
    mp_bitcnt_t depth, w;
    
    flint_rand_t state;

    printf("fft/ifft_radix2....");
    fflush(stdout);

    flint_randinit(state);
    _flint_rand_init_gmp(state);

    for (depth = 6; depth <= 12; depth++)
    {
        for (w = 1; w <= 5; w++)
        {
            mp_size_t n = (1UL<<depth);
            mp_size_t limbs = (n*w)/GMP_LIMB_BITS;
            mp_size_t size = limbs + 1;
            mp_size_t i;
            mp_limb_t * ptr;
            mp_limb_t ** ii, ** jj, *t1, *t2;
        
            ii = flint_malloc((2*(n + n*size) + 2*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size) 
            {
                ii[i] = ptr;
                random_fermat(ii[i], state, limbs);
            }
            t1 = ptr;
            t2 = t1 + size;
   
            for (i = 0; i < 2*n; i++)
               mpn_normmod_2expp1(ii[i], limbs);
    
            jj = flint_malloc(2*(n + n*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size) 
            {
                jj[i] = ptr;
                mpn_copyi(jj[i], ii[i], size);
            }
   
            fft_radix2(ii, n, w, &t1, &t2);
            ifft_radix2(ii, n, w, &t1, &t2);
            for (i = 0; i < 2*n; i++)
            {
                mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1);
                mpn_normmod_2expp1(ii[i], limbs);
            }

            for (i = 0; i < 2*n; i++)
            {
                if (mpn_cmp(ii[i], jj[i], size) != 0)
                {
                    printf("FAIL:\n");
                    printf("Error in entry %ld\n", i);
                    abort();
                }
            }

            flint_free(ii);
            flint_free(jj);
        }
    }

    flint_randclear(state);
    
    printf("PASS\n");
    return 0;
}
示例#7
0
void mpir_fft_mulmod_2expp1(mp_ptr r1, mp_srcptr i1, mp_srcptr i2, 
                 mp_size_t r_limbs, mp_bitcnt_t depth, mp_bitcnt_t w)
{
   mp_size_t n = (((mp_size_t)1)<<depth);
   mp_bitcnt_t bits1 = (r_limbs*GMP_LIMB_BITS)/(2*n);
   
   mp_size_t limb_add, limbs = (n*w)/GMP_LIMB_BITS;
   mp_size_t size = limbs + 1;
   mp_size_t i, j, ll;

   mp_limb_t * ptr;
   mp_limb_t ** ii, ** jj, *tt, *t1, *t2, *s1, *r, *ii0, *jj0;
   mp_limb_t c;
   TMP_DECL;

   TMP_MARK;
   ii = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 4*n + 5*size);
   for (i = 0, ptr = (mp_ptr) ii + 2*n; i < 2*n; i++, ptr += size) 
   {
      ii[i] = ptr;
   }
   ii0 = ptr;
   t1 = ii0 + 2*n;
   t2 = t1 + size;
   s1 = t2 + size;
   r = s1 + size;
   tt = r + 2*n;
   
   if (i1 != i2)
   {
      jj = TMP_BALLOC_MP_PTRS(2*(n + n*size) + 2*n);
      for (i = 0, ptr = (mp_ptr) jj + 2*n; i < 2*n; i++, ptr += size) 
      {
         jj[i] = ptr;
      }
      jj0 = ptr;
   } else
   {
      jj = ii;
      jj0 = ii0;
   }

   j = mpir_fft_split_bits(ii, i1, r_limbs, bits1, limbs);
   for ( ; j < 2*n; j++)
      mpn_zero(ii[j], limbs + 1);

   for (i = 0; i < 2*n; i++)
      ii0[i] = ii[i][0];
 
   mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1);
   for (j = 0; j < 2*n; j++)
      mpn_normmod_2expp1(ii[j], limbs);

   if (i1 != i2)
   {
      j = mpir_fft_split_bits(jj, i2, r_limbs, bits1, limbs);
      for ( ; j < 2*n; j++)
          mpn_zero(jj[j], limbs + 1);

      for (i = 0; i < 2*n; i++)
         jj0[i] = jj[i][0];

      mpir_fft_negacyclic(jj, n, w, &t1, &t2, &s1);
   }
      
   for (j = 0; j < 2*n; j++)
   {
      if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs);
      c = 2*ii[j][limbs] + jj[j][limbs];

      ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt);
   }
   
   mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1);
   
   mpir_fft_naive_convolution_1(r, ii0, jj0, 2*n);

   for (j = 0; j < 2*n; j++)
   {
      mp_limb_t t, cy2;
      
      mpn_div_2expmod_2expp1(ii[j], ii[j], limbs, depth + 1);
      mpn_normmod_2expp1(ii[j], limbs);

      t = ii[j][limbs];
      ii[j][limbs] = r[j] - ii[j][0];
      cy2 = mpn_add_1(ii[j], ii[j], limbs + 1, ii[j][limbs]);
      add_ssaaaa(r[j], ii[j][limbs], 0, ii[j][limbs], 0, t);
      if (cy2) r[j]++;
   }
   
   mpn_zero(r1, r_limbs + 1);
   mpir_fft_combine_bits(r1, ii, 2*n - 1, bits1, limbs + 1, r_limbs + 1);
   
   /* 
      as the negacyclic convolution has effectively done subtractions
      some of the coefficients will be negative, so need to subtract p
   */
   ll = 0;
   limb_add = bits1/GMP_LIMB_BITS;
   
   for (j = 0; j < 2*n - 2; j++)
   {   
      if (r[j]) 
         mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1);
      else if ((mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */
      {
         mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1);
         mpn_sub_1(r1 + ll + limbs + 1, r1 + ll + limbs + 1, r_limbs - limbs - ll, 1);
      }

      ll += limb_add;
   }
   /* penultimate coefficient, top bit was already ignored */
   if (r[j] || (mp_limb_signed_t) ii[j][limbs] < 0) /* coefficient was -ve */
      mpn_sub_1(r1 + ll + 1, r1 + ll + 1, r_limbs - ll, 1);
   
   /* final coefficient wraps around */
   if (limb_add)
      r1[r_limbs] += mpn_add_n(r1 + r_limbs - limb_add, r1 + r_limbs - limb_add, ii[2*n - 1], limb_add);
   c = mpn_sub_n(r1, r1, ii[2*n - 1] + limb_add, limbs + 1 - limb_add);
   mpn_addmod_2expp1_1(r1 + limbs + 1 - limb_add, r_limbs - limbs - 1 + limb_add, -c);
   mpn_normmod_2expp1(r1, r_limbs);
   
   TMP_FREE;
}
示例#8
0
int
main(void)
{
    mp_bitcnt_t bits;
    mp_size_t j, k, n, w, limbs;
    mp_limb_t * nn;
    mpz_t p, m1, m2;

    FLINT_TEST_INIT(state);

    flint_printf("normmod_2expp1....");
    fflush(stdout);

    
    _flint_rand_init_gmp(state);

    mpz_init(m1);
    mpz_init(m2);
    mpz_init(p);

    /* normalisation mod p = 2^wn + 1 where B divides nw and n is a power of 2 */
    for (bits = FLINT_BITS; bits < 32*FLINT_BITS; bits += FLINT_BITS)
    {
        for (j = 1; j < 32; j++)
        {
            for (k = 1; k <= GMP_NUMB_BITS; k <<= 1)
            {
                n = bits/k;
                w = j*k;
                limbs = (n*w)/GMP_LIMB_BITS;
            
                nn = flint_malloc((limbs + 1)*sizeof(mp_limb_t));
                random_fermat(nn, state, limbs);
                fermat_to_mpz(m1, nn, limbs);
                set_p(p, n, w);
            
                mpn_normmod_2expp1(nn, limbs);
                fermat_to_mpz(m2, nn, limbs);
                mpz_mod(m1, m1, p);

                if (mpz_cmp(m1, m2) != 0)
                {
                    flint_printf("FAIL:\n");
                    flint_printf("mpn_normmod_2expp1 error\n");
                    gmp_printf("want %Zx\n\n", m1);
                    gmp_printf("got  %Zx\n", m2);
                    abort();
                }

                flint_free(nn);
            }
        }
    }

    mpz_clear(m2);
    mpz_clear(m1);
    mpz_clear(p);

    FLINT_TEST_CLEANUP(state);
    
    flint_printf("PASS\n");
    return 0;
}
示例#9
0
int
main(void)
{
    mp_bitcnt_t depth, w;

    gmp_randstate_t state;

    tests_start();
    fflush(stdout);

    gmp_randinit_default(state);

    for (depth = 6; depth <= 12; depth++)
    {
        for (w = 1; w <= 5; w++)
        {
            mp_size_t n = (((mp_limb_t)1)<<depth);
            mp_size_t limbs = (n*w)/GMP_LIMB_BITS;
            mp_size_t size = limbs + 1;
            mp_size_t i;
            mp_limb_t * ptr;
            mp_limb_t ** ii, ** jj, * t1, * t2, * s1;

            ii = malloc((2*(n + n*size) + 3*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) ii + 2*n; i < 2*n; i++, ptr += size)
            {
                ii[i] = ptr;
                mpir_random_fermat(ii[i], state, limbs);
            }
            t1 = ptr;
            t2 = t1 + size;
            s1 = t2 + size;

            for (i = 0; i < 2*n; i++)
                mpn_normmod_2expp1(ii[i], limbs);

            jj = malloc(2*(n + n*size)*sizeof(mp_limb_t));
            for (i = 0, ptr = (mp_limb_t *) jj + 2*n; i < 2*n; i++, ptr += size)
            {
                jj[i] = ptr;
                mpn_copyi(jj[i], ii[i], size);
            }

            mpir_fft_negacyclic(ii, n, w, &t1, &t2, &s1);
            mpir_ifft_negacyclic(ii, n, w, &t1, &t2, &s1);
            for (i = 0; i < 2*n; i++)
            {
                mpn_div_2expmod_2expp1(ii[i], ii[i], limbs, depth + 1);
                mpn_normmod_2expp1(ii[i], limbs);
            }

            for (i = 0; i < 2*n; i++)
            {
                if (mpn_cmp(ii[i], jj[i], size) != 0)
                {
                    printf("FAIL:\n");
                    printf("Error in entry %ld\n", i);
                    abort();
                }
            }

            free(ii);
            free(jj);
        }
    }

    gmp_randclear(state);

    tests_end();
    return 0;
}
示例#10
0
int
main(void)
{
    mp_bitcnt_t depth, w;
    int iters;

    flint_rand_t state;

    printf("mulmod_2expp1....");
    fflush(stdout);

    flint_randinit(state);
    _flint_rand_init_gmp(state);

    for (iters = 0; iters < 100; iters++)
    {
        for (depth = 6; depth <= 18; depth++)
        {
            for (w = 1; w <= 2; w++)
            {
                mp_size_t n = (1UL<<depth);
                mp_bitcnt_t bits = n*w;
                mp_size_t int_limbs = bits/FLINT_BITS;
                mp_size_t j;
                mp_limb_t c, * i1, * i2, * r1, * r2, * tt;
        
                i1 = flint_malloc(6*(int_limbs+1)*sizeof(mp_limb_t));
                i2 = i1 + int_limbs + 1;
                r1 = i2 + int_limbs + 1;
                r2 = r1 + int_limbs + 1;
                tt = r2 + int_limbs + 1;

                random_fermat(i1, state, int_limbs);
                random_fermat(i2, state, int_limbs);
                mpn_normmod_2expp1(i1, int_limbs);
                mpn_normmod_2expp1(i2, int_limbs);

                fft_mulmod_2expp1(r2, i1, i2, n, w, tt);
                c = i1[int_limbs] + 2*i2[int_limbs];
                c = mpn_mulmod_2expp1(r1, i1, i2, c, int_limbs*FLINT_BITS, tt);
            
                for (j = 0; j < int_limbs; j++)
                {
                    if (r1[j] != r2[j]) 
                    {
                        printf("error in limb %ld, %lx != %lx\n", j, r1[j], r2[j]);
                        abort();
                    }
                }

                if (c != r2[int_limbs])
                {
                    printf("error in limb %ld, %lx != %lx\n", j, c, r2[j]);
                    abort();
                }

                flint_free(i1);
            }
        }
    }
    
    /* test squaring */
    for (iters = 0; iters < 100; iters++)
    {
        for (depth = 6; depth <= 18; depth++)
        {
            for (w = 1; w <= 2; w++)
            {
                mp_size_t n = (1UL<<depth);
                mp_bitcnt_t bits = n*w;
                mp_size_t int_limbs = bits/FLINT_BITS;
                mp_size_t j;
                mp_limb_t c, * i1, * r1, * r2, * tt;
        
                i1 = flint_malloc(5*(int_limbs+1)*sizeof(mp_limb_t));
                r1 = i1 + int_limbs + 1;
                r2 = r1 + int_limbs + 1;
                tt = r2 + int_limbs + 1;

                random_fermat(i1, state, int_limbs);
                mpn_normmod_2expp1(i1, int_limbs);
                
                fft_mulmod_2expp1(r2, i1, i1, n, w, tt);
                c = i1[int_limbs] + 2*i1[int_limbs];
                c = mpn_mulmod_2expp1(r1, i1, i1, c, int_limbs*FLINT_BITS, tt);
            
                for (j = 0; j < int_limbs; j++)
                {
                    if (r1[j] != r2[j]) 
                    {
                        printf("error in limb %ld, %lx != %lx\n", j, r1[j], r2[j]);
                        abort();
                    }
                }

                if (c != r2[int_limbs])
                {
                    printf("error in limb %ld, %lx != %lx\n", j, c, r2[j]);
                    abort();
                }

                flint_free(i1);
            }
        }
    }
    
    flint_randclear(state);
    
    printf("PASS\n");
    return 0;
}