コード例 #1
0
ファイル: mulmod_bexpp1.c プロジェクト: BrianGladman/mpir
int mpn_mulmod_Bexpp1(mp_ptr r, mp_srcptr i1, mp_srcptr i2, mp_size_t limbs, mp_ptr tt)
{
   mp_size_t bits = limbs * GMP_LIMB_BITS;
   mp_bitcnt_t depth1, depth = 1;
   mp_size_t w1, off;

   mp_limb_t c = 2 * i1[limbs] + i2[limbs];
   
   if (c & 1)
   {
      mpn_neg_n(r, i1, limbs + 1);
      mpn_normmod_2expp1(r, limbs);
      return 0;
   } else if (c & 2)
   {
      mpn_neg_n(r, i2, limbs + 1);
      mpn_normmod_2expp1(r, limbs);
      return 0;
   }

   if (limbs <= FFT_MULMOD_2EXPP1_CUTOFF) 
   {
       if(bits)
          r[limbs] = mpn_mulmod_2expp1_basecase(r, i1, i2, c, bits, tt);
       else
          r[limbs] = 0;
       return r[limbs];
   }
   while ((((mp_limb_t)1)<<depth) < bits) depth++;
   
   if (depth < 12) off = mulmod_2expp1_table_n[0];
   else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12];
   depth1 = depth/2 - off;
   
   w1 = bits/(((mp_limb_t)1)<<(2*depth1));

   mpir_fft_mulmod_2expp1(r, i1, i2, limbs, depth1, w1);

   return r[limbs];
}
コード例 #2
0
/*
   ret + (xp, n) = (yp, n)*(zp, n) % 2^b + 1  
   needs (tp, 2n) temp space, everything reduced mod 2^b 
   inputs, outputs are fully reduced
  
   N.B: 2n is not the same as 2b rounded up to nearest limb!
*/
inline static int
mpn_mulmod_2expp1_internal (mp_ptr xp, mp_srcptr yp, mp_srcptr zp,
			                                         mpir_ui b, mp_ptr tp)
{
  mp_size_t n, k;
  mp_limb_t c;

  TMP_DECL;

  n = BITS_TO_LIMBS (b);
  k = GMP_NUMB_BITS * n - b;

  ASSERT(b > 0);
  ASSERT(n > 0);
  ASSERT_MPN(yp, n);
  ASSERT_MPN(zp, n);
  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, yp, n));
  ASSERT(!MPN_OVERLAP_P (tp, 2 * n, zp, n));
  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp, n));
  ASSERT(MPN_SAME_OR_SEPARATE_P (xp, tp + n, n));
  ASSERT(k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0);
  ASSERT(k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0);

#ifndef TUNE_PROGRAM_BUILD
  if (k == 0 && n > FFT_MULMOD_2EXPP1_CUTOFF && n == mpir_fft_adjust_limbs(n))
  {
      mp_bitcnt_t depth1, depth = 1;
      mp_size_t w1, off;
      mp_ptr tx, ty, tz;
      mp_limb_t ret;

      TMP_MARK;

      tx = TMP_BALLOC_LIMBS(3*n + 3);
      ty = tx + n + 1;
      tz = ty + n + 1;

      MPN_COPY(ty, yp, n);
      MPN_COPY(tz, zp, n);
      ty[n] = 0;
      tz[n] = 0;

      while ((((mp_limb_t)1)<<depth) < b) depth++;
   
      if (depth < 12) off = mulmod_2expp1_table_n[0];
      else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12];
      depth1 = depth/2 - off;
   
      w1 = b/(((mp_limb_t)1)<<(2*depth1));

      mpir_fft_mulmod_2expp1(tx, ty, tz, n, depth1, w1);

      MPN_COPY(xp, tx, n);
      ret = tx[n];
      
      TMP_FREE;

	   return ret;
  }
#endif

  if (yp == zp)
     mpn_sqr(tp, yp, n);
  else
     mpn_mul_n (tp, yp, zp, n);

  if (k == 0)
    {
      c = mpn_sub_n (xp, tp, tp + n, n);

      return mpn_add_1 (xp, xp, n, c);
    }

  c = tp[n - 1];
  tp[n - 1] &= GMP_NUMB_MASK >> k;

#if HAVE_NATIVE_mpn_sublsh_nc
  c = mpn_sublsh_nc (xp, tp, tp + n, n, k, c);
#else
  {
    mp_limb_t c1;
    c1 = mpn_lshift (tp + n, tp + n, n, k);
    tp[n] |= c >> (GMP_NUMB_BITS - k);
    c = mpn_sub_n (xp, tp, tp + n, n) + c1;
  }
#endif

  c = mpn_add_1 (xp, xp, n, c);
  xp[n - 1] &= GMP_NUMB_MASK >> k;

  return c;
}
コード例 #3
0
ファイル: tune-fft.c プロジェクト: Masuzu/RumourPropagation
int
main(void)
{
    mp_bitcnt_t depth, w, depth1, w1;
    clock_t start, end;
    double elapsed;
    double best = 0.0;
    mp_size_t best_off, off, best_d, best_w;

    gmp_randstate_t state;

    printf("/* fft_tuning.h -- autogenerated by tune-fft */\n\n");
    printf("#ifndef FFT_TUNING_H\n");
    printf("#define FFT_TUNING_H\n\n");
    printf("#include \"mpir.h\"\n\n");
    printf("#define FFT_TAB \\\n");
    fflush(stdout);

    gmp_randinit_default(state);

    printf("   { "); fflush(stdout);
    for (depth = 6; depth <= 10; depth++)
    {
        printf("{ "); fflush(stdout);
        for (w = 1; w <= 2; w++)
        {
            int iters = 100*((mp_size_t) 1 << (3*(10 - depth)/2)), i;
            
            mp_size_t n = ((mp_limb_t)1<<depth);
            mp_bitcnt_t bits1 = (n*w - (depth + 1))/2; 
            mp_size_t len1 = 2*n;
            mp_size_t len2 = 2*n;

            mp_bitcnt_t b1 = len1*bits1, b2 = len2*bits1;
            mp_size_t n1, n2;
            mp_size_t j;
            mp_limb_t * i1, *i2, *r1;
   
            n1 = (b1 - 1)/GMP_LIMB_BITS + 1;
            n2 = (b2 - 1)/GMP_LIMB_BITS + 1;
                    
            i1 = malloc(2*(n1 + n2)*sizeof(mp_limb_t));
            i2 = i1 + n1;
            r1 = i2 + n2;
   
            mpn_urandomb(i1, state, b1);
            mpn_urandomb(i2, state, b2);
  
            best_off = -1;
            
            for (off = 0; off <= 4; off++)
            {
               start = clock();
               for (i = 0; i < iters; i++)
                  mpn_mul_trunc_sqrt2(r1, i1, n1, i2, n2, depth - off, w*((mp_size_t)1 << (off*2)));
               end = clock();
               
               elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;
               
               if (elapsed < best || best_off == -1)
               {
                  best_off = off;
                  best = elapsed;
               }
            }
           
            printf("%ld", best_off); 
            if (w != 2) printf(",");
            printf(" "); fflush(stdout);

            free(i1);
        }
        printf("}");
        if (depth != 10) printf(",");
        printf(" "); fflush(stdout);
    }

    printf("}\n\n");
    
    best_d = 12;
    best_w = 1;
    best_off = -1;

    printf("#define MULMOD_TAB \\\n");
    fflush(stdout);
    printf("   { "); fflush(stdout);
    for (depth = 12; best_off != 1 ; depth++)
    {
        for (w = 1; w <= 2; w++)
        {
            int iters = 100*((mp_size_t) 1 << (3*(18 - depth)/2)), i;
            mp_size_t n = ((mp_limb_t)1<<depth);
            mp_bitcnt_t bits = n*w;
            mp_size_t int_limbs = (bits - 1)/GMP_LIMB_BITS + 1;
            mp_size_t j;
            mp_limb_t c, * i1, * i2, * r1, * tt;
        
            if (depth <= 21) iters = 32*((mp_size_t) 1 << (21 - depth));
            else iters = MAX(32/((mp_size_t) 1 << (depth - 21)), 1);

            i1 = malloc(6*(int_limbs+1)*sizeof(mp_limb_t));
            i2 = i1 + int_limbs + 1;
            r1 = i2 + int_limbs + 1;
            tt = r1 + 2*(int_limbs + 1);
                
            mpn_urandomb(i1, state, int_limbs*GMP_LIMB_BITS);
            mpn_urandomb(i2, state, int_limbs*GMP_LIMB_BITS);
            i1[int_limbs] = 0;
            i2[int_limbs] = 0;

            depth1 = 1;
            while ((((mp_limb_t)1)<<depth1) < bits) depth1++;
            depth1 = depth1/2;

            w1 = bits/(((mp_limb_t)1)<<(2*depth1));

            best_off = -1;
            
            for (off = 0; off <= 4; off++)
            {
               start = clock();
               for (i = 0; i < iters; i++)
                  mpir_fft_mulmod_2expp1(r1, i1, i2, int_limbs, depth1 - off, w1*((mp_size_t)1 << (off*2)));
               end = clock();
               
               elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;
               
               if (best_off == -1 || elapsed < best)
               {
                  best_off = off;
                  best = elapsed;
               }
            }
           
            start = clock();
            for (i = 0; i < iters; i++)
                mpn_mulmod_2expp1_basecase(r1, i1, i2, 0, bits, tt);
            end = clock();
               
            elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;
            if (elapsed < best)
            {
                best_d = depth + (w == 2);
                best_w = w + 1 - 2*(w == 2);
            }

            printf("%ld", best_off); 
            if (w != 2) printf(", "); fflush(stdout);

            free(i1);
        }
        printf(", "); fflush(stdout);
    }
    printf("1 }\n\n");
    
    printf("#define FFT_N_NUM %ld\n\n", 2*(depth - 12) + 1);
    
    printf("#define FFT_MULMOD_2EXPP1_CUTOFF %ld\n\n", ((mp_limb_t) 1 << best_d)*best_w/(2*GMP_LIMB_BITS));
    
    gmp_randclear(state);
    
    printf("#endif\n");
    return 0;
}