Exemple #1
0
void sub(ZZX& x, const ZZ& b, const ZZX& a)
{
   long n = a.rep.length();
   if (n == 0) {
      conv(x, b);
   }
   else if (x.rep.MaxLength() == 0) {
      negate(x, a);
      add(x.rep[0], a.rep[0], b);
      x.normalize();
   }
   else {
      // ugly...b could alias a coeff of x

      ZZ *xp = x.rep.elts();
      sub(xp[0], b, a.rep[0]);
      x.rep.SetLength(n);
      xp = x.rep.elts();
      const ZZ *ap = a.rep.elts();
      long i;
      for (i = 1; i < n; i++)
         negate(xp[i], ap[i]);
      x.normalize();
   }
}
Exemple #2
0
void sub(ZZX& x, const ZZX& a, const ZZ& b)
{
   long n = a.rep.length();
   if (n == 0) {
      conv(x, b);
      negate(x, x);
   }
   else if (&x == &a) {
      sub(x.rep[0], a.rep[0], b);
      x.normalize();
   }
   else if (x.rep.MaxLength() == 0) {
      x = a;
      sub(x.rep[0], a.rep[0], b);
      x.normalize();
   }
   else {
      // ugly...b could alias a coeff of x

      ZZ *xp = x.rep.elts();
      sub(xp[0], a.rep[0], b);
      x.rep.SetLength(n);
      xp = x.rep.elts();
      const ZZ *ap = a.rep.elts();
      long i;
      for (i = 1; i < n; i++)
         xp[i] = ap[i];
      x.normalize();
   }
}
Exemple #3
0
void SingleCRT::toPoly(ZZX& poly, const IndexSet& s) const
{
  IndexSet s1 = map.getIndexSet() & s;

  if (card(s1) == 0) {
    clear(poly);
    return;
  }

  ZZ p = to_ZZ(context.ithPrime(s1.first()));  // the first modulus

  poly = map[s1.first()];  // Get poly modulo the first prime

  vec_ZZ& vp = poly.rep;

  // ensure that coeficient vector is of size phi(m) with entries in [-p/2,p/2]
  long phim = context.zMstar.phiM();
  long vpLength = vp.length();
  if (vpLength<phim) { // just in case of leading zeros in poly
    vp.SetLength(phim);
    for (long j=vpLength; j<phim; j++) vp[j]=0;
  }
  ZZ p_over_2 = p/2;
  for (long j=0; j<phim; j++) if (vp[j] > p_over_2) vp[j] -= p;

  // do incremental integer CRT for other levels  
  for (long i = s1.next(s1.first()); i <= s1.last(); i = s1.next(i)) {
    long q = context.ithPrime(i);       // the next modulus

    // CRT the coefficient vectors of poly and current
    intVecCRT(vp, p, map[i].rep, q);    // defined in the module NumbTh
    p *= q;     // update the modulus
  }
  poly.normalize(); // need to call this after we work on the coeffs
}
Exemple #4
0
void sub(ZZX& x, const ZZX& a, const ZZX& b)
{
   long da = deg(a);
   long db = deg(b);
   long minab = min(da, db);
   long maxab = max(da, db);
   x.rep.SetLength(maxab+1);

   long i;
   const ZZ *ap, *bp;
   ZZ* xp;

   for (i = minab+1, ap = a.rep.elts(), bp = b.rep.elts(), xp = x.rep.elts();
        i; i--, ap++, bp++, xp++)
      sub(*xp, (*ap), (*bp));

   if (da > minab && &x != &a)
      for (i = da-minab; i; i--, xp++, ap++)
         *xp = *ap;
   else if (db > minab)
      for (i = db-minab; i; i--, xp++, bp++)
         negate(*xp, *bp);
   else
      x.normalize();

}
Exemple #5
0
void sampleGaussian(ZZX &poly, long n, double stdev)
{
  static double const Pi=4.0*atan(1.0); // Pi=3.1415..
  static long const bignum = 0xfffffff;
  // THREADS: C++11 guarantees these are initialized only once

  if (n<=0) n=deg(poly)+1; if (n<=0) return;
  poly.SetMaxLength(n); // allocate space for degree-(n-1) polynomial
  for (long i=0; i<n; i++) SetCoeff(poly, i, ZZ::zero());

  // Uses the Box-Muller method to get two Normal(0,stdev^2) variables
  for (long i=0; i<n; i+=2) {
    double r1 = (1+RandomBnd(bignum))/((double)bignum+1);
    double r2 = (1+RandomBnd(bignum))/((double)bignum+1);
    double theta=2*Pi*r1;
    double rr= sqrt(-2.0*log(r2))*stdev;

    assert(rr < 8*stdev); // sanity-check, no more than 8 standard deviations

    // Generate two Gaussians RV's, rounded to integers
    long x = (long) floor(rr*cos(theta) +0.5);
    SetCoeff(poly, i, x);
    if (i+1 < n) {
      x = (long) floor(rr*sin(theta) +0.5);
      SetCoeff(poly, i+1, x);
    }
  }
  poly.normalize(); // need to call this after we work on the coeffs
}
Exemple #6
0
void KarSqr(ZZX& c, const ZZX& a)
{
   if (IsZero(a)) {
      clear(c);
      return;
   }

   vec_ZZ mem;

   const ZZ *ap;
   ZZ *cp;

   long sa = a.rep.length();

   if (&a == &c) {
      mem = a.rep;
      ap = mem.elts();
   }
   else
      ap = a.rep.elts();

   c.rep.SetLength(sa+sa-1);
   cp = c.rep.elts();

   long maxa, xover;

   maxa = MaxBits(a);

   xover = 2;

   if (sa < xover)
      PlainSqr(cp, ap, sa);
   else {
      /* karatsuba */

      long n, hn, sp, depth;

      n = sa;
      sp = 0;
      depth = 0;
      do {
         hn = (n+1) >> 1;
         sp += hn+hn+hn - 1;
         n = hn;
         depth++;
      } while (n >= xover);

      ZZVec stk;
      stk.SetSize(sp,
         ((2*maxa + NumBits(sa) + 2*depth + 10)
          + NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS);

      KarSqr(cp, ap, sa, stk.elts());
   }

   c.normalize();
}
Exemple #7
0
void PlainMul(ZZX& x, const ZZX& a, const ZZX& b)
{
   if (&a == &b) {
      PlainSqr(x, a);
      return;
   }

   long da = deg(a);
   long db = deg(b);

   if (da < 0 || db < 0) {
      clear(x);
      return;
   }

   long d = da+db;



   const ZZ *ap, *bp;
   ZZ *xp;

   ZZX la, lb;

   if (&x == &a) {
      la = a;
      ap = la.rep.elts();
   }
   else
      ap = a.rep.elts();

   if (&x == &b) {
      lb = b;
      bp = lb.rep.elts();
   }
   else
      bp = b.rep.elts();

   x.rep.SetLength(d+1);

   xp = x.rep.elts();

   long i, j, jmin, jmax;
   ZZ t, accum;

   for (i = 0; i <= d; i++) {
      jmin = max(0, i-db);
      jmax = min(da, i);
      clear(accum);
      for (j = jmin; j <= jmax; j++) {
	 mul(t, ap[j], bp[i-j]);
	 add(accum, accum, t);
      }
      xp[i] = accum;
   }
   x.normalize();
}
Exemple #8
0
void PlainSqr(ZZX& x, const ZZX& a)
{
   long da = deg(a);

   if (da < 0) {
      clear(x);
      return;
   }

   long d = 2*da;

   const ZZ *ap;
   ZZ *xp;

   ZZX la;

   if (&x == &a) {
      la = a;
      ap = la.rep.elts();
   }
   else
      ap = a.rep.elts();


   x.rep.SetLength(d+1);

   xp = x.rep.elts();

   long i, j, jmin, jmax;
   long m, m2;
   ZZ t, accum;

   for (i = 0; i <= d; i++) {
      jmin = max(0, i-da);
      jmax = min(da, i);
      m = jmax - jmin + 1;
      m2 = m >> 1;
      jmax = jmin + m2 - 1;
      clear(accum);
      for (j = jmin; j <= jmax; j++) {
	 mul(t, ap[j], ap[i-j]);
	 add(accum, accum, t);
      }
      add(accum, accum, accum);
      if (m & 1) {
	 sqr(t, ap[jmax + 1]);
	 add(accum, accum, t);
      }

      xp[i] = accum;
   }

   x.normalize();
}
Exemple #9
0
void add(ZZX& x, const ZZX& a, long b)
{
   if (a.rep.length() == 0) {
      conv(x, b);
   }
   else {
      if (&x != &a) x = a;
      add(x.rep[0], x.rep[0], b);
      x.normalize();
   }
}
Exemple #10
0
void sampleSmall(ZZX &poly, long n)
{
  if (n<=0) n=deg(poly)+1; if (n<=0) return;
  poly.SetMaxLength(n); // allocate space for degree-(n-1) polynomial

  for (long i=0; i<n; i++) {    // Chosse coefficients, one by one
    long u = lrand48();
    if (u&1) {                 // with prob. 1/2 choose between -1 and +1
      u = (u & 2) -1;
      SetCoeff(poly, i, u);
    }
    else SetCoeff(poly, i, 0); // with ptob. 1/2 set to 0
  }
  poly.normalize(); // need to call this after we work on the coeffs
}
Exemple #11
0
void sub(ZZX& x, const ZZX& a, long b)
{
   if (b == 0) {
      x = a;
      return;
   }

   if (a.rep.length() == 0) {
      x.rep.SetLength(1);
      conv(x.rep[0], b);
      negate(x.rep[0], x.rep[0]);
   }
   else {
      if (&x != &a) x = a;
      sub(x.rep[0], x.rep[0], b);
   }
   x.normalize();
}
Exemple #12
0
void sampleHWt(ZZX &poly, long Hwt, long n)
{
  if (n<=0) n=deg(poly)+1; if (n<=0) return;
  clear(poly);          // initialize to zero
  poly.SetMaxLength(n); // allocate space for degree-(n-1) polynomial

  long b,u,i=0;
  if (Hwt>n) Hwt=n;
  while (i<Hwt) {  // continue until exactly Hwt nonzero coefficients
    u=lrand48()%n; // The next coefficient to choose
    if (IsZero(coeff(poly,u))) { // if we didn't choose it already
      b = lrand48()&2; // b random in {0,2}
      b--;             //   random in {-1,1}
      SetCoeff(poly,u,b);

      i++; // count another nonzero coefficient
    }
  }
  poly.normalize(); // need to call this after we work on the coeffs
}
Exemple #13
0
void SetCoeff(ZZX& x, long i)
{
   long j, m;

   if (i < 0)
      Error("coefficient index out of range");

   if (NTL_OVERFLOW(i, 1, 0))
      Error("overflow in SetCoeff");

   m = deg(x);

   if (i > m) {
      x.rep.SetLength(i+1);
      for (j = m+1; j < i; j++)
         clear(x.rep[j]);
   }
   set(x.rep[i]);
   x.normalize();
}
Exemple #14
0
void sampleUniform(ZZX& poly, const ZZ& B, long n)
{
  if (n<=0) n=deg(poly)+1; if (n<=0) return;
  if (B <= 0) {
    clear(poly);
    return;
  }

  poly.SetMaxLength(n); // allocate space for degree-(n-1) polynomial

  ZZ UB, tmp;

  UB =  2*B + 1;
  for (long i = 0; i < n; i++) {
    RandomBnd(tmp, UB);
    tmp -= B; 
    poly.rep[i] = tmp;
  }

  poly.normalize();
}
Exemple #15
0
//FIXME: both the reduction from powerful to the individual primes and
//  the CRT back to poly can be made more efficient
void PowerfulDCRT::powerfulToZZX(ZZX& poly, const Vec<ZZ>& powerful,
				 IndexSet set) const
{
  zz_pBak bak; bak.save(); // backup NTL's current modulus

  if (empty(set)) set = IndexSet(0, pConvVec.length()-1);

  clear(poly);
  //  poly.SetLength(powerful.length());
  ZZ product = conv<ZZ>(1L);
  for (long i = set.first(); i <= set.last(); i = set.next(i)) {
    pConvVec[i].restoreModulus();
    //    long newPrime = zz_p::modulus();

    HyperCube<zz_p> oneRowPwrfl(indexes.shortSig);
    conv(oneRowPwrfl.getData(), powerful); // reduce and convert to Vec<zz_p>

    zz_pX oneRowPoly;
    pConvVec[i].powerfulToPoly(oneRowPoly, oneRowPwrfl);
    CRT(poly, product, oneRowPoly);                   // NTL :-)
  }
  poly.normalize();
}
Exemple #16
0
void SetCoeff(ZZX& x, long i, const ZZ& a)
{
   long j, m;

   if (i < 0)
      Error("SetCoeff: negative index");

   if (NTL_OVERFLOW(i, 1, 0))
      Error("overflow in SetCoeff");

   m = deg(x);

   if (i > m && IsZero(a)) return;

   if (i > m) {
      /* careful: a may alias a coefficient of x */

      long alloc = x.rep.allocated();

      if (alloc > 0 && i >= alloc) {
         ZZ aa = a;
         x.rep.SetLength(i+1);
         x.rep[i] = aa;
      }
      else {
         x.rep.SetLength(i+1);
         x.rep[i] = a;
      }

      for (j = m+1; j < i; j++)
         clear(x.rep[j]);
   }
   else
      x.rep[i] = a;

   x.normalize();
}
Exemple #17
0
// bootstrap a ciphertext to reduce noise
void FHEPubKey::reCrypt(Ctxt &ctxt)
{
  FHE_TIMER_START;

  // Some sanity checks for dummy ciphertext
  long ptxtSpace = ctxt.getPtxtSpace();
  if (ctxt.isEmpty()) return;
  if (ctxt.parts.size()==1 && ctxt.parts[0].skHandle.isOne()) {
    // Dummy encryption, just ensure that it is reduced mod p
    ZZX poly = to_ZZX(ctxt.parts[0]);
    for (long i=0; i<poly.rep.length(); i++)
      poly[i] = to_ZZ( rem(poly[i],ptxtSpace) );
    poly.normalize();
    ctxt.DummyEncrypt(poly);
    return;
  }

  assert(recryptKeyID>=0); // check that we have bootstrapping data

  long p = getContext().zMStar.getP();
  long r = getContext().alMod.getR();
  long p2r = getContext().alMod.getPPowR();

  // the bootstrapping key is encrypted relative to plaintext space p^{e-e'+r}.
  long e = getContext().rcData.e;
  long ePrime = getContext().rcData.ePrime;
  long p2ePrime = power_long(p,ePrime);
  long q = power_long(p,e)+1;
  assert(e>=r);

#ifdef DEBUG_PRINTOUT
  cerr << "reCrypt: p="<<p<<", r="<<r<<", e="<<e<<" ePrime="<<ePrime
       << ", q="<<q<<endl;
#endif

  // can only bootstrap ciphertext with plaintext-space dividing p^r
  assert(p2r % ptxtSpace == 0);

  FHE_NTIMER_START(preProcess);

  // Make sure that this ciphertxt is in canonical form
  if (!ctxt.inCanonicalForm()) ctxt.reLinearize();

  // Mod-switch down if needed
  IndexSet s = ctxt.getPrimeSet() / getContext().specialPrimes; // set minus
  if (s.card()>2) { // leave only bottom two primes
    long frst = s.first();
    long scnd = s.next(frst);
    IndexSet s2(frst,scnd);
    s.retain(s2); // retain only first two primes
  }
  ctxt.modDownToSet(s);

  // key-switch to the bootstrapping key
  ctxt.reLinearize(recryptKeyID);

  // "raw mod-switch" to the bootstrapping mosulus q=p^e+1.
  vector<ZZX> zzParts; // the mod-switched parts, in ZZX format
  double noise = ctxt.rawModSwitch(zzParts, q);
  noise = sqrt(noise);

  // Add multiples of p2r and q to make the zzParts divisible by p^{e'}
  long maxU=0;
  for (long i=0; i<(long)zzParts.size(); i++) {
    // make divisible by p^{e'}
    long newMax = makeDivisible(zzParts[i].rep, p2ePrime, p2r, q,
				getContext().rcData.alpha);
    zzParts[i].normalize();   // normalize after working directly on the rep
    if (maxU < newMax)  maxU = newMax;
  }

  // Check that the estimated noise is still low
  if (noise + maxU*p2r*(skHwts[recryptKeyID]+1) > q/2) 
    cerr << " * noise/q after makeDivisible = "
	 << ((noise + maxU*p2r*(skHwts[recryptKeyID]+1))/q) << endl;

  for (long i=0; i<(long)zzParts.size(); i++)
    zzParts[i] /= p2ePrime;   // divide by p^{e'}

  // Multiply the post-processed cipehrtext by the encrypted sKey
#ifdef DEBUG_PRINTOUT
  cerr << "+ Before recryption ";
  decryptAndPrint(cerr, recryptEkey, *dbgKey, *dbgEa, printFlag);
#endif

  double p0size = to_double(coeffsL2Norm(zzParts[0]));
  double p1size = to_double(coeffsL2Norm(zzParts[1]));
  ctxt = recryptEkey;
  ctxt.multByConstant(zzParts[1], p1size*p1size);
  ctxt.addConstant(zzParts[0], p0size*p0size);

#ifdef DEBUG_PRINTOUT
  cerr << "+ Before linearTrans1 ";
  decryptAndPrint(cerr, ctxt, *dbgKey, *dbgEa, printFlag);
#endif
  FHE_NTIMER_STOP(preProcess);

  // Move the powerful-basis coefficients to the plaintext slots
  FHE_NTIMER_START(LinearTransform1);
  ctxt.getContext().rcData.firstMap->apply(ctxt);
  FHE_NTIMER_STOP(LinearTransform1);

#ifdef DEBUG_PRINTOUT
  cerr << "+ After linearTrans1 ";
  decryptAndPrint(cerr, ctxt, *dbgKey, *dbgEa, printFlag);
#endif

  // Extract the digits e-e'+r-1,...,e-e' (from fully packed slots)
  extractDigitsPacked(ctxt, e-ePrime, r, ePrime,
		      context.rcData.unpackSlotEncoding);

#ifdef DEBUG_PRINTOUT
  cerr << "+ Before linearTrans2 ";
  decryptAndPrint(cerr, ctxt, *dbgKey, *dbgEa, printFlag);
#endif

  // Move the slots back to powerful-basis coefficients
  FHE_NTIMER_START(LinearTransform2);
  ctxt.getContext().rcData.secondMap->apply(ctxt);
  FHE_NTIMER_STOP(LinearTransform2);
}
Exemple #18
0
void conv(ZZX& x, const vec_ZZ& a)
{
   x.rep = a;
   x.normalize();
}
Exemple #19
0
void conv(ZZX& x, const ZZ_pX& a)
{
   conv(x.rep, a.rep);
   x.normalize();
}
Exemple #20
0
// Main entry point: Evaluate a cleartext polynomial on an encrypted input
void polyEval(Ctxt& ret, ZZX poly, const Ctxt& x, long k)
     // Note: poly is passed by value, so caller keeps the original
{
  if (deg(poly)<=2) {  // nothing to optimize here
    if (deg(poly)<1) { // A constant
      ret.clear();
      ret.addConstant(coeff(poly, 0));
    } else {           // A linear or quadratic polynomial
      DynamicCtxtPowers babyStep(x, deg(poly));
      simplePolyEval(ret, poly, babyStep);
    }
    return;
  }

  // How many baby steps: set k~sqrt(n/2), rounded up/down to a power of two

  // FIXME: There may be some room for optimization here: it may be possible
  // to choose k as something other than a power of two and still maintain
  // optimal depth, in principle we can try all possible values of k between
  // two consecutive powers of two and choose the one that gives the least
  // number of multiplies, conditioned on minimum depth.

  if (k<=0) {
    long kk = (long) sqrt(deg(poly)/2.0);
    k = 1L << NextPowerOfTwo(kk);

    // heuristic: if k>>kk then use a smaler power of two
    if ((k==16 && deg(poly)>167) || (k>16 && k>(1.44*kk)))
      k /= 2;
  }
#ifdef DEBUG_PRINTOUT
  cerr << "  k="<<k;
#endif

  long n = divc(deg(poly),k);      // n = ceil(deg(p)/k), deg(p) >= k*n
  DynamicCtxtPowers babyStep(x, k);
  const Ctxt& x2k = babyStep.getPower(k);

  // Special case when deg(p)>k*(2^e -1)
  if (n==(1L << NextPowerOfTwo(n))) { // n is a power of two
    DynamicCtxtPowers giantStep(x2k, n/2);
    degPowerOfTwo(ret, poly, k, babyStep, giantStep);
    return;
  }

  // If n is not a power of two, ensure that poly is monic and that
  // its degree is divisible by k, then call the recursive procedure

  const ZZ p = to_ZZ(x.getPtxtSpace());
  ZZ top = LeadCoeff(poly);
  ZZ topInv; // the inverse mod p of the top coefficient of poly (if any)
  bool divisible = (n*k == deg(poly)); // is the degree divisible by k?
  long nonInvertibe = InvModStatus(topInv, top, p);
       // 0 if invertible, 1 if not

  // FIXME: There may be some room for optimization below: instead of
  // adding a term X^{n*k} we can add X^{n'*k} for some n'>n, so long
  // as n' is smaller than the next power of two. We could save a few
  // multiplications since giantStep[n'] may be easier to compute than
  // giantStep[n] when n' has fewer 1's than n in its binary expansion.

  ZZ extra = ZZ::zero();    // extra!=0 denotes an added term extra*X^{n*k}
  if (!divisible || nonInvertibe) {  // need to add a term
    top = to_ZZ(1);  // new top coefficient is one
    topInv = top;    // also the new inverse is one
    // set extra = 1 - current-coeff-of-X^{n*k}
    extra = SubMod(top, coeff(poly,n*k), p);
    SetCoeff(poly, n*k); // set the top coefficient of X^{n*k} to one
  }

  long t = IsZero(extra)? divc(n,2) : n;
  DynamicCtxtPowers giantStep(x2k, t);

  if (!IsOne(top)) {
    poly *= topInv; // Multiply by topInv to make into a monic polynomial
    for (long i=0; i<=n*k; i++) rem(poly[i], poly[i], p);
    poly.normalize();
  }
  recursivePolyEval(ret, poly, k, babyStep, giantStep);

  if (!IsOne(top)) {
    ret.multByConstant(top);
  }

  if (!IsZero(extra)) { // if we added a term, now is the time to subtract back
    Ctxt topTerm = giantStep.getPower(n);
    topTerm.multByConstant(extra);
    ret -= topTerm;
  }
}
Exemple #21
0
void KarMul(ZZX& c, const ZZX& a, const ZZX& b)
{
   if (IsZero(a) || IsZero(b)) {
      clear(c);
      return;
   }

   if (&a == &b) {
      KarSqr(c, a);
      return;
   }

   vec_ZZ mem;

   const ZZ *ap, *bp;
   ZZ *cp;

   long sa = a.rep.length();
   long sb = b.rep.length();

   if (&a == &c) {
      mem = a.rep;
      ap = mem.elts();
   }
   else
      ap = a.rep.elts();

   if (&b == &c) {
      mem = b.rep;
      bp = mem.elts();
   }
   else
      bp = b.rep.elts();

   c.rep.SetLength(sa+sb-1);
   cp = c.rep.elts();

   long maxa, maxb, xover;

   maxa = MaxBits(a);
   maxb = MaxBits(b);
   xover = 2;

   if (sa < xover || sb < xover)
      PlainMul(cp, ap, sa, bp, sb);
   else {
      /* karatsuba */

      long n, hn, sp, depth;

      n = max(sa, sb);
      sp = 0;
      depth = 0;
      do {
         hn = (n+1) >> 1;
         sp += (hn << 2) - 1;
         n = hn;
         depth++;
      } while (n >= xover);

      ZZVec stk;
      stk.SetSize(sp,
         ((maxa + maxb + NumBits(min(sa, sb)) + 2*depth + 10)
          + NTL_ZZ_NBITS-1)/NTL_ZZ_NBITS);

      KarMul(cp, ap, sa, bp, sb, stk.elts());
   }

   c.normalize();
}
Exemple #22
0
// Note: poly is passed by value, not by reference, so the calling routine
// keeps its original polynomial
long evalPolyTopLevel(ZZX poly, long x, long p, long k=0)
{
  if (verbose)
  cerr << "\n* evalPolyTopLevel: p="<<p<<", x="<<x<<", poly="<<poly;

  if (deg(poly)<=2) { // nothing to optimize here
    if (deg(poly)<1) return to_long(coeff(poly, 0));
    DynamicPtxtPowers babyStep(x, p, deg(poly));
    long ret = simplePolyEval(poly, babyStep, p);
    totalDepth = babyStep.getDepth(deg(poly));
    return ret;
  }

  // How many baby steps: set k~sqrt(n/2), rounded up/down to a power of two

  // FIXME: There may be some room for optimization here: it may be possible
  // to choose k as something other than a power of two and still maintain
  // optimal depth, in principle we can try all possible values of k between
  // the two powers of two and choose the one that goves the least number
  // of multiplies, conditioned on minimum depth.

  if (k<=0) {
    long kk = (long) sqrt(deg(poly)/2.0);
    k = 1L << NextPowerOfTwo(kk);

    // heuristic: if k>>kk then use a smaler power of two
    if ((k==16 && deg(poly)>167) || (k>16 && k>(1.44*kk)))
      k /= 2;
  }
  cerr << ", k="<<k;

  long n = divc(deg(poly),k);          // deg(p) = k*n +delta
  if (verbose) cerr << ", n="<<n<<endl;

  DynamicPtxtPowers babyStep(x, p, k);
  long x2k = babyStep.getPower(k);

  // Special case when deg(p)>k*(2^e -1)
  if (n==(1L << NextPowerOfTwo(n))) { // n is a power of two
    DynamicPtxtPowers giantStep(x2k, p, n/2, babyStep.getDepth(k));
    if (verbose)
      cerr << "babyStep="<<babyStep<<", giantStep="<<giantStep<<endl;
    long ret = degPowerOfTwo(poly, k, babyStep, giantStep, p, totalDepth);

    if (verbose) {
      cerr << "  degPowerOfTwo("<<poly<<") returns "<<ret<<", depth="<<totalDepth<<endl;
      if (ret != polyEvalMod(poly,babyStep[0], p)) {
	cerr << "  ## recursive call failed, ret="<<ret<<"!=" 
	     << polyEvalMod(poly,babyStep[0], p)<<endl;
	exit(0);
      }
      // cerr << "  babyStep depth=[";
      // for (long i=0; i<babyStep.size(); i++) 
      // 	cerr << babyStep.getDepth(i+1)<<" ";
      // cerr << "]\n";
      // cerr << "  giantStep depth=[";
      // for (long i=0; i<giantStep.size(); i++)
      // 	cerr<<giantStep.getDepth(i+1)<<" ";
      // cerr << "]\n";
    }
    return ret;
  }

  // If n is not a power of two, ensure that poly is monic and that
  // its degree is divisible by k, then call the recursive procedure

  ZZ topInv; // the inverse mod p of the top coefficient of poly (if any)
  bool divisible = (n*k == deg(poly)); // is the degree divisible by k?
  long nonInvertibe = InvModStatus(topInv, LeadCoeff(poly), to_ZZ(p));
       // 0 if invertible, 1 if not

  // FIXME: There may be some room for optimization below: instead of
  // adding a term X^{n*k} we can add X^{n'*k} for some n'>n, so long
  // as n' is smaller than the next power of two. We could save a few
  // multiplications since giantStep[n'] may be easier to compute than
  // giantStep[n] when n' has fewer 1's than n in its binary expansion.

  long extra = 0;        // extra!=0 denotes an added term extra*X^{n*k}
  if (!divisible || nonInvertibe) {  // need to add a term
    // set extra = 1 - current-coeff-of-X^{n*k}
    extra = SubMod(1, to_long(coeff(poly,n*k)), p);
    SetCoeff(poly, n*k); // set the top coefficient of X^{n*k} to one
    topInv = to_ZZ(1);   // inverse of new top coefficient is one
  }

  long t = (extra==0)? divc(n,2) : n;
  DynamicPtxtPowers giantStep(x2k, p, t, babyStep.getDepth(k));

  if (verbose)
    cerr << "babyStep="<<babyStep<<", giantStep="<<giantStep<<endl;

  long y; // the value to return
  long subDepth1 =0;
  if (!IsOne(topInv)) {
    long top = to_long(poly[n*k]); // record the current top coefficient
    //    cerr << ", top-coeff="<<top;

    // Multiply by topInv modulo p to make into a monic polynomial
    poly *= topInv;
    for (long i=0; i<=n*k; i++) rem(poly[i], poly[i], to_ZZ(p));
    poly.normalize();

    y = recursivePolyEval(poly, k, babyStep, giantStep, p, subDepth1);
    if (verbose) {
      cerr << "  recursivePolyEval("<<poly<<") returns "<<y<<", depth="<<subDepth1<<endl;
      if (y != polyEvalMod(poly,babyStep[0], p)) {
	cerr << "## recursive call failed, ret="<<y<<"!=" 
	     << polyEvalMod(poly,babyStep[0], p)<<endl;
	exit(0);
      }
    }
    y = MulMod(y, top, p); // multiply by the original top coefficient
  }
  else {
    y = recursivePolyEval(poly, k, babyStep, giantStep, p, subDepth1);
    if (verbose) {
      cerr << "  recursivePolyEval("<<poly<<") returns "<<y<<", depth="<<subDepth1<<endl;
      if (y != polyEvalMod(poly,babyStep[0], p)) {
	cerr << "## recursive call failed, ret="<<y<<"!=" 
	     << polyEvalMod(poly,babyStep[0], p)<<endl;
	exit(0);
      }
    }
  }

  if (extra != 0) { // if we added a term, now is the time to subtract back
    if (verbose) cerr << ", subtracting "<<extra<<"*X^"<<k*n;
    extra = MulMod(extra, giantStep.getPower(n), p);
    totalDepth = max(subDepth1, giantStep.getDepth(n));
    y = SubMod(y, extra, p);
  }
  else totalDepth = subDepth1;
  if (verbose) cerr << endl;
  return y;
}