zz_pInfoT::zz_pInfoT(long NewP, long maxroot)
{
   if (maxroot < 0) LogicError("zz_pContext: maxroot may not be negative");

   if (NewP <= 1) LogicError("zz_pContext: p must be > 1");
   if (NumBits(NewP) > NTL_SP_NBITS) ResourceError("zz_pContext: modulus too big");

   ZZ P, B, M, M1, MinusM;
   long n, i;
   long q, t;

   p = NewP;

   pinv = 1/double(p);

   p_info = 0;

   conv(P, p);

   sqr(B, P);
   LeftShift(B, B, maxroot+NTL_FFTFudge);

   set(M);
   n = 0;
   while (M <= B) {
      UseFFTPrime(n);
      q = GetFFTPrime(n);
      n++;
      mul(M, M, q);
   }

   if (n > 4) LogicError("zz_pInit: too many primes");

   NumPrimes = n;
   PrimeCnt = n;
   MaxRoot = CalcMaxRoot(q);

   if (maxroot < MaxRoot)
      MaxRoot = maxroot;

   negate(MinusM, M);
   MinusMModP = rem(MinusM, p);

   CoeffModP.SetLength(n);
   x.SetLength(n);
   u.SetLength(n);

   for (i = 0; i < n; i++) {
      q = GetFFTPrime(i);

      div(M1, M, q);
      t = rem(M1, q);
      t = InvMod(t, q);
      if (NTL_zz_p_QUICK_CRT) mul(M1, M1, t);
      CoeffModP[i] = rem(M1, p);
      x[i] = ((double) t)/((double) q);
      u[i] = t;
   }
}
Esempio n. 2
0
long GetPrimeNumber(long bound, ZZ &prod){

	long nprimes;
	zz_pBak bak;
	bak.save();
	for (nprimes = 0; NumBits(prod) <= bound; nprimes++) {
		UseFFTPrime(nprimes);
		      mul(prod, prod, GetFFTPrime(nprimes));
   }
	bak.restore();
	return nprimes;
}
Esempio n. 3
0
void ZZ_p::DoInstall()
{
   SmartPtr<ZZ_pTmpSpaceT> tmps = 0;

   do { // NOTE: thread safe lazy init 
      Lazy<ZZ_pFFTInfoT>::Builder builder(ZZ_pInfo->FFTInfo);
      if (!builder()) break;

      UniquePtr<ZZ_pFFTInfoT> FFTInfo;
      FFTInfo.make();

      ZZ B, M, M1, M2, M3;
      long n, i;
      long q, t;
      mulmod_t qinv;

      sqr(B, ZZ_pInfo->p);

      LeftShift(B, B, NTL_FFTMaxRoot+NTL_FFTFudge);

      // FIXME: the following is quadratic time...would
      // be nice to get a faster solution...
      // One could estimate the # of primes by summing logs,
      // then multiply using a tree-based multiply, then 
      // adjust up or down...

      // Assuming IEEE floating point, the worst case estimate
      // for error guarantees a correct answer +/- 1 for
      // numprimes up to 2^25...for sure we won't be
      // using that many primes...we can certainly put in 
      // a sanity check, though. 

      // If I want a more accuaruate summation (with using Kahan,
      // which has some portability issues), I could represent 
      // numbers as x = a + f, where a is integer and f is the fractional
      // part.  Summing in this representation introduces an *absolute*
      // error of 2 epsilon n, which is just as good as Kahan 
      // for this application.

      // same strategy could also be used in the ZZX HomMul routine,
      // if we ever want to make that subquadratic

      set(M);
      n = 0;
      while (M <= B) {
         UseFFTPrime(n);
         q = GetFFTPrime(n);
         n++;
         mul(M, M, q);
      }

      FFTInfo->NumPrimes = n;
      FFTInfo->MaxRoot = CalcMaxRoot(q);


      double fn = double(n);

      if (8.0*fn*(fn+48) > NTL_FDOUBLE_PRECISION)
         ResourceError("modulus too big");


      if (8.0*fn*(fn+48) <= NTL_FDOUBLE_PRECISION/double(NTL_SP_BOUND))
         FFTInfo->QuickCRT = true;
      else
         FFTInfo->QuickCRT = false;
      
      // FIXME: some of this stuff does not need to be initialized
      // at all if FFTInfo->crt_struct.special()

      FFTInfo->x.SetLength(n);
      FFTInfo->u.SetLength(n);
      FFTInfo->uqinv.SetLength(n);

      FFTInfo->rem_struct.init(n, ZZ_pInfo->p, GetFFTPrime);

      FFTInfo->crt_struct.init(n, ZZ_pInfo->p, GetFFTPrime);

      if (!FFTInfo->crt_struct.special()) {
         ZZ qq, rr;

         DivRem(qq, rr, M, ZZ_pInfo->p);

         NegateMod(FFTInfo->MinusMModP, rr, ZZ_pInfo->p);

         for (i = 0; i < n; i++) {
            q = GetFFTPrime(i);
            qinv = GetFFTPrimeInv(i);

            long tt = rem(qq, q);

            mul(M2, ZZ_pInfo->p, tt);
            add(M2, M2, rr); 
            div(M2, M2, q);  // = (M/q) rem p
            

            div(M1, M, q);
            t = rem(M1, q);
            t = InvMod(t, q);

            mul(M3, M2, t);
            rem(M3, M3, ZZ_pInfo->p);

            FFTInfo->crt_struct.insert(i, M3);


            FFTInfo->x[i] = ((double) t)/((double) q);
            FFTInfo->u[i] = t;
            FFTInfo->uqinv[i] = PrepMulModPrecon(FFTInfo->u[i], q, qinv);
         }
      }

      tmps = MakeSmart<ZZ_pTmpSpaceT>();
      tmps->crt_tmp_vec.fetch(FFTInfo->crt_struct);
      tmps->rem_tmp_vec.fetch(FFTInfo->rem_struct);

      builder.move(FFTInfo);
   } while (0);

   if (!tmps) {
      const ZZ_pFFTInfoT *FFTInfo = ZZ_pInfo->FFTInfo.get();
      tmps = MakeSmart<ZZ_pTmpSpaceT>();
      tmps->crt_tmp_vec.fetch(FFTInfo->crt_struct);
      tmps->rem_tmp_vec.fetch(FFTInfo->rem_struct);
   }

   ZZ_pTmpSpace = tmps;
}
Esempio n. 4
0
int main()
{

#ifdef NTL_SPMM_ULL

   if (sizeof(NTL_ULL_TYPE) < 2*sizeof(long)) {
      printf("999999999999999 ");
      print_flag();
      return 0;
   }

#endif


   long n, k;

   n = 200;
   k = 10*NTL_ZZ_NBITS;

   ZZ p;

   RandomLen(p, k);


   ZZ_p::init(p);         // initialization

   ZZ_pX f, g, h, r1, r2, r3;

   random(g, n);    // g = random polynomial of degree < n
   random(h, n);    // h =             "   "
   random(f, n);    // f =             "   "

   SetCoeff(f, n);  // Sets coefficient of X^n to 1
   

   // For doing arithmetic mod f quickly, one must pre-compute
   // some information.

   ZZ_pXModulus F;
   build(F, f);

   PlainMul(r1, g, h);  // this uses classical arithmetic
   PlainRem(r1, r1, f);

   MulMod(r2, g, h, F);  // this uses the FFT

   MulMod(r3, g, h, f);  // uses FFT, but slower

   // compare the results...

   if (r1 != r2) {
      printf("999999999999999 ");
      print_flag();
      return 0;
   }
   else if (r1 != r3) {
      printf("999999999999999 ");
      print_flag();
      return 0;
   }

   double t;
   long i, j;
   long iter;

   const int nprimes = 30;
   const long L = 12; 
   const long N = 1L << L;
   long r;
   

   for (r = 0; r < nprimes; r++) UseFFTPrime(r);

   vec_long aa[nprimes], AA[nprimes];

   for (r = 0; r < nprimes; r++) {
      aa[r].SetLength(N);
      AA[r].SetLength(N);

      for (i = 0; i < N; i++)
         aa[r][i] = RandomBnd(GetFFTPrime(r));


      FFTFwd(AA[r].elts(), aa[r].elts(), L, r);
      FFTRev1(AA[r].elts(), AA[r].elts(), L, r);
   }

   iter = 1;

   do {
     t = GetTime();
     for (j = 0; j < iter; j++) {
        for (r = 0; r < nprimes; r++) {
           long *AAp = AA[r].elts();
           long *aap = aa[r].elts();
           long q = GetFFTPrime(r);
           mulmod_t qinv = GetFFTPrimeInv(r);

           FFTFwd(AAp, aap, L, r);
           FFTRev1(AAp, aap, L, r);
           for (i = 0; i < N; i++) AAp[i] = NormalizedMulMod(AAp[i], aap[i], q, qinv);
        }
     }
     t = GetTime() - t;
     iter = 2*iter;
   } while(t < 1);

   iter = iter/2;

   iter = long((1.5/t)*iter) + 1;


   double tvec[5];
   long w;

   for (w = 0; w < 5; w++) {
     t = GetTime();
     for (j = 0; j < iter; j++) {
        for (r = 0; r < nprimes; r++) {
           long *AAp = AA[r].elts();
           long *aap = aa[r].elts();
           long q = GetFFTPrime(r);
           mulmod_t qinv = GetFFTPrimeInv(r);

           FFTFwd(AAp, aap, L, r);
           FFTRev1(AAp, aap, L, r);
           for (i = 0; i < N; i++) AAp[i] = NormalizedMulMod(AAp[i], aap[i], q, qinv);
        }
     }
     t = GetTime() - t;
     tvec[w] = t;
   }

   t = clean_data(tvec);

   t = floor((t/iter)*1e13);

   if (t < 0 || t >= 1e15)
      printf("999999999999999 ");
   else
      printf("%015.0f ", t);

   printf(" [%ld] ", iter);

   print_flag();

   return 0;
}