/*
 * Returns a size that is larger than and closest to aSize where both
 * width and height are powers of two.
 * If the OpenGL setup is capable of using non-POT textures, then it
 * will just return aSize.
 */
static IntSize
CalculatePOTSize(const IntSize& aSize, GLContext* gl)
{
  if (gl->CanUploadNonPowerOfTwo())
    return aSize;

  return IntSize(NextPowerOfTwo(aSize.width), NextPowerOfTwo(aSize.height));
}
예제 #2
0
// ------------------------------------------------------------------------
S4::Interpreter::Interpreter(const char *program) {
    static const char *coreWords = "42 .";

    dictionary = new Dictionary();

    program = program ? program : "";

    sizeOfCore = NextPowerOfTwo(std::strlen(coreWords) + 1 + std::strlen(program));
    core = new char[sizeOfCore];
    endOfCore = core + sizeOfCore;
    *(endOfCore - 1) = 0;

    // load primitives
    AddPrimitive(new S4::Primitive::Add());
    AddPrimitive(new S4::Primitive::CloseBrace());
    AddPrimitive(new S4::Primitive::Dot());
    AddPrimitive(new S4::Primitive::Dup());
    AddPrimitive(new S4::Primitive::OpenBrace());
    AddPrimitive(new S4::Primitive::StackHeight());

    // put core words at the beginning of memory and the program after it
    core = StrCat(coreWords, " ", program);

    printf(".core:\t%s\n", core);
    // and initialize the program counter
    programCounter = core;
}
예제 #3
0
파일: Buffer.cpp 프로젝트: crnt/scaliendb
void Buffer::Allocate(unsigned size_, bool keepold)
{
    char*       newbuffer;
        
    if (size_ <= size)
        return;

    size_ = NextPowerOfTwo(size_);

    if (buffer == array || preallocated)
        newbuffer = (char*) malloc(size_);
    else
        newbuffer = (char*) realloc(buffer, size_);

    ASSERT(newbuffer != NULL);

    if (keepold && length > 0)
    {
        if (buffer == array)
            memcpy(newbuffer, buffer, length);
    }
    
    buffer = newbuffer;
    size = size_;
    preallocated = false;
}
예제 #4
0
void Buffer::Allocate(unsigned size_, bool keepold)
{
    char*       newbuffer;
        
    if (size_ <= size)
        return;

    size_ = NextPowerOfTwo(size_);

    if (buffer == array || preallocated)
        newbuffer = (char*) malloc(size_);
    else
        newbuffer = (char*) realloc(buffer, size_);

    if (newbuffer == NULL)
    {
        throw std::bad_alloc();
        // in case of exceptions are disabled
        STOP_FAIL(1, "Out of memory error");
    }

    if (keepold && length > 0)
    {
        if (buffer == array || preallocated)
            memcpy(newbuffer, buffer, length);
    }
    
    buffer = newbuffer;
    size = size_;
    preallocated = false;
}
예제 #5
0
void	CARingBuffer::Allocate(int nChannels, UInt32 bytesPerFrame, UInt32 capacityFrames)
{
	Deallocate();
	
	capacityFrames = NextPowerOfTwo(capacityFrames);
	
	mNumberChannels = nChannels;
	mBytesPerFrame = bytesPerFrame;
	mCapacityFrames = capacityFrames;
	mCapacityFramesMask = capacityFrames - 1;
	mCapacityBytes = bytesPerFrame * capacityFrames;

	// put everything in one memory allocation, first the pointers, then the deinterleaved channels
	UInt32 allocSize = (mCapacityBytes + sizeof(Byte *)) * nChannels;
	Byte *p = (Byte *)CA_malloc(allocSize);
	memset(p, 0, allocSize);
	mBuffers = (Byte **)p;
	p += nChannels * sizeof(Byte *);
	for (int i = 0; i < nChannels; ++i) {
		mBuffers[i] = p;
		p += mCapacityBytes;
	}
	
	for (UInt32 i = 0; i<kGeneralRingTimeBoundsQueueSize; ++i)
	{
		mTimeBoundsQueue[i].mStartTime = 0;
		mTimeBoundsQueue[i].mEndTime = 0;
		mTimeBoundsQueue[i].mUpdateCounter = 0;
	}
	mTimeBoundsQueuePtr = 0;
}
예제 #6
0
void BluesteinFFT(zz_pX& x, long n, const zz_p& root,
		  const zz_pX& powers, const Vec<mulmod_precon_t>& powers_aux, 
                  const fftRep& Rb)
{
  // FHE_TIMER_START;

  if (IsZero(x)) return;
  if (n<=0) {
    clear(x);
    return;
  }

  long p = zz_p::modulus();

  long dx = deg(x);
  for (long i=0; i<=dx; i++) {
    x[i].LoopHole() = MulModPrecon(rep(x[i]), rep(powers[i]), p, powers_aux[i]);
  }
  x.normalize();

  long k = NextPowerOfTwo(2*n-1);
  fftRep& Ra = Cmodulus::getScratch_fftRep(k);
  TofftRep(Ra, x, k);

  mul(Ra,Ra,Rb);           // multiply in FFT representation

  FromfftRep(x, Ra, n-1, 2*(n-1)); // then convert back
  dx = deg(x); 
  for (long i=0; i<=dx; i++) {
	  x[i].LoopHole() = MulModPrecon(rep(x[i]), rep(powers[i]), p, powers_aux[i]);
  }
  x.normalize();
}
예제 #7
0
void Light::SetShadowMapSize(int size)
{
    if (size < 1)
        size = 1;

    shadowMapSize = NextPowerOfTwo(size);
}
예제 #8
0
// This procedure assumes that k*(2^e +1) > deg(poly) > k*(2^e -1),
// and that babyStep contains >= k + (deg(poly) mod k) powers
static void
degPowerOfTwo(Ctxt& ret, const ZZX& poly, long k,
	      DynamicCtxtPowers& babyStep, DynamicCtxtPowers& giantStep)
{
  if (deg(poly)<=babyStep.size()) { // Edge condition, use simple eval
    simplePolyEval(ret, poly, babyStep);
    return;
  }
  long n = deg(poly)/k;        // We assume n=2^e or n=2^e -1
  n = 1L << NextPowerOfTwo(n); // round up to n=2^e
  ZZX r = trunc(poly, (n-1)*k);      // degree <= k(2^e-1)-1
  ZZX q = RightShift(poly, (n-1)*k); // 0 < degree < 2k
  SetCoeff(r, (n-1)*k);              // monic, degree == k(2^e-1)
  q -= 1;

  PatersonStockmeyer(ret, r, k, n/2, 0,	babyStep, giantStep);

  Ctxt tmp(ret.getPubKey(), ret.getPtxtSpace());
  simplePolyEval(tmp, q, babyStep); // evaluate q

  // multiply by X^{k(n-1)} with minimum depth
  for (long i=1; i<n; i*=2) {  
    tmp.multiplyBy(giantStep.getPower(i));
  }
  ret += tmp;
}
예제 #9
0
  void BuildQueueInit(BuildQueue* queue, const BuildQueueConfig* config)
  {
    CHECK(config->m_MaxExpensiveCount > 0 && config->m_MaxExpensiveCount <= config->m_ThreadCount);

    MutexInit(&queue->m_Lock);
    CondInit(&queue->m_WorkAvailable);

    // Compute queue capacity. Allocate space for a power of two number of
    // indices that's at least one larger than the max number of nodes. Because
    // the queue is treated as a ring buffer, we want W=R to mean an empty
    // buffer.
    uint32_t capacity = NextPowerOfTwo(config->m_MaxNodes + 1);

    MemAllocHeap* heap = config->m_Heap;

    queue->m_Queue              = HeapAllocateArray<int32_t>(heap, capacity);
    queue->m_QueueReadIndex     = 0;
    queue->m_QueueWriteIndex    = 0;
    queue->m_QueueCapacity      = capacity;
    queue->m_Config             = *config;
    queue->m_PendingNodeCount   = 0;
    queue->m_FailedNodeCount    = 0;
    queue->m_QuitSignalled      = false;
    queue->m_ExpensiveRunning   = 0;
    queue->m_ExpensiveWaitCount = 0;
    queue->m_ExpensiveWaitList  = HeapAllocateArray<NodeState*>(heap, capacity);

    CHECK(queue->m_Queue);

    if (queue->m_Config.m_ThreadCount > kMaxBuildThreads)
    {
      Log(kWarning, "too many build threads (%d) - clamping to %d",
          queue->m_Config.m_ThreadCount, kMaxBuildThreads);

      queue->m_Config.m_ThreadCount = kMaxBuildThreads;
    }

    Log(kDebug, "build queue initialized; ring buffer capacity = %u", queue->m_QueueCapacity);

    // Block all signals on the main thread.
    SignalBlockThread(true);
    SignalHandlerSetCondition(&queue->m_WorkAvailable);

    // Create build threads.
    for (int i = 0, thread_count = config->m_ThreadCount; i < thread_count; ++i)
    {
      ThreadState* thread_state = &queue->m_ThreadState[i];

      ThreadStateInit(thread_state, queue, MB(64), MB(32), i);

      if (i > 0)
      {
        Log(kDebug, "starting build thread %d", i);
        queue->m_Threads[i] = ThreadStart(BuildThreadRoutine, thread_state);
      }
    }
  }
/**
 * \brief Loads a texture from an image file and fits into a GL-compatible size.
 * \param fileName The name of the image file.
 * \param filtering True for linear filtering, false for nearest-neighbor.
 * \param realW Returns the real width of the texture.
 * \param realH Returns the real height of the texture.
 * \return The OpenGL texture ID.
 */
long SDLGL_LoadTextureFromFileBestFit( std::string fileName, bool filtering, unsigned long &realW, unsigned long &realH ) {
  GLuint theTexture;
  SDL_Surface *loadSurface, *theSurface, *convertedSurface;
  Uint32 rmask, gmask, bmask, amask;

  loadSurface = IMG_Load( fileName.c_str() );

  if ( loadSurface ) {
    #if SDL_BYTEORDER == SDL_BIG_ENDIAN
      rmask = 0xff000000;
      gmask = 0x00ff0000;
      bmask = 0x0000ff00;
      amask = 0x000000ff;
    #else
      rmask = 0x000000ff;
      gmask = 0x0000ff00;
      bmask = 0x00ff0000;
      amask = 0xff000000;
    #endif
    theSurface = SDL_CreateRGBSurface( SDL_SWSURFACE | SDL_SRCALPHA, NextPowerOfTwo(loadSurface->w), NextPowerOfTwo(loadSurface->h), 32, rmask, gmask, bmask, amask );
    SDL_FillRect( theSurface, NULL, SDL_MapRGBA( theSurface->format, 0,0,0,255 ) );
    convertedSurface = SDL_ConvertSurface( loadSurface, theSurface->format, SDL_SWSURFACE | SDL_SRCALPHA );

    MoveTexture( convertedSurface, theSurface );
  }
  else
    theSurface = NULL;

  if ( theSurface ) {
    glGenTextures( 1, &theTexture);

    glBindTexture( GL_TEXTURE_2D, theTexture );

    if ( filtering ) {
      glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
      glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
    } else {
      glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
      glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
    }

    realW = loadSurface->w;
    realH = loadSurface->h;
    glTexImage2D( GL_TEXTURE_2D, 0, 4, theSurface->w, theSurface->h, 0, GL_RGBA, GL_UNSIGNED_BYTE, theSurface->pixels );

    SDL_FreeSurface( theSurface );
    SDL_FreeSurface( loadSurface );
    SDL_FreeSurface( convertedSurface );

    return theTexture;
  }
  else
    return 0; // GLspeak for "no texture"
}
예제 #11
0
파일: CModulus.cpp 프로젝트: TomMD/HElib
// Constructor: it is assumed that zms is already set with m>1
// If q == 0, then the current context is used
template <class type> Cmod<type>::
Cmod(const PAlgebra &zms, const zz &qq, const zz &rt)
{
  assert(zms.getM()>1);
  bool explicitModulus = true;

  if (qq == 0) {
    q = zp::modulus();
    explicitModulus = false;
  }
  else
    q = qq;

  zMStar = &zms;
  root = rt;

  zz mm;
  mm = zms.getM();
  m_inv = InvMod(mm, q);

  zz_pBak bak; 

  if (explicitModulus) {
    bak.save(); // backup the current modulus
    context = BuildContext(q, NextPowerOfTwo(zms.getM()) + 1);
    context.restore();       // set NTL's current modulus to q
  }
  else
    context.save();

  if (IsZero(root)) { // Find a 2m-th root of unity modulo q, if not given
    zp rtp;
    long e = 2*zms.getM();
    FindPrimitiveRoot(rtp,e); // NTL routine, relative to current modulus
    if (IsZero(rtp)) // sanity check
      Error("Cmod::compRoots(): no 2m'th roots of unity mod q");
    root = rep(rtp);
  }
  rInv = InvMod(root,q); // set rInv = root^{-1} mod q

  // Allocate memory (relative to current modulus that was defined above).
  // These objects will be initialized when anyone calls FFT/iFFT.

  zpx phimx_poly;
  conv(phimx_poly, zms.getPhimX());

  powers  = new zpx();
  Rb      = new fftrep();
  Ra      = new fftrep();
  ipowers = new zpx();
  iRb     = new fftrep();
  phimx   = new zpxModulus(phimx_poly);
  scratch = new zpx();
}
예제 #12
0
// prime power solver
// A is an n x n matrix, we compute its inverse mod p^r. An error is raised
// if A is not inverible mod p. zz_p::modulus() is assumed to be p^r, for
// p prime, r >= 1. Also zz_pE::modulus() is assumed to be initialized.
void ppInvert(mat_zz_pE& X, const mat_zz_pE& A, long p, long r)
{
  if (r == 1) { // use native inversion from NTL
    inv(X, A);    // X = A^{-1}
    return;
  }

  // begin by inverting A modulo p

  // convert to ZZX for a safe transaltion to mod-p objects
  vector< vector<ZZX> > tmp;
  convert(tmp, A);
  { // open a new block for mod-p computation
  ZZX G;
  convert(G, zz_pE::modulus());
  zz_pBak bak_pr; bak_pr.save(); // backup the mod-p^r moduli
  zz_pEBak bak_prE; bak_prE.save();
  zz_p::init(p);   // Set the mod-p moduli
  zz_pE::init(conv<zz_pX>(G));

  mat_zz_pE A1, Inv1;
  convert(A1, tmp);   // Recover A as a mat_zz_pE object modulo p
  inv(Inv1, A1);      // Inv1 = A^{-1} (mod p)
  convert(tmp, Inv1); // convert to ZZX for transaltion to a mod-p^r object
  } // mod-p^r moduli restored on desctuction of bak_pr and bak_prE
  mat_zz_pE XX;
  convert(XX, tmp); // XX = A^{-1} (mod p)

  // Now lift the solution modulo p^r

  // Compute the "correction factor" Z, s.t. XX*A = I - p*Z (mod p^r)
  long n = A.NumRows();
  const mat_zz_pE I = ident_mat_zz_pE(n); // identity matrix
  mat_zz_pE Z = I - XX*A;

  convert(tmp, Z);  // Conver to ZZX to divide by p
  for (long i=0; i<n; i++) for (long j=0; j<n; j++) tmp[i][j] /= p;
  convert(Z, tmp);  // convert back to a mod-p^r object

  // The inverse of A is ( I+(pZ)+(pZ)^2+...+(pZ)^{r-1} )*XX (mod p^r). We use
  // O(log r) products to copmute it as (I+pZ)* (I+(pZ)^2)* (I+(pZ)^4)*...* XX

  long e = NextPowerOfTwo(r); // 2^e is smallest power of two >= r

  Z *= p;                 // = pZ
  mat_zz_pE prod = I + Z; // = I + pZ
  for (long i=1; i<e; i++) {
    sqr(Z, Z);     // = (pZ)^{2^i}
    prod *= (I+Z); // = sum_{j=0}^{2^{i+1}-1} (pZ)^j
  }
  mul(X, prod, XX); // X = A^{-1} mod p^r
  assert(X*A == I);
}
예제 #13
0
// Returns the e'th power of X, computing it as needed
Ctxt& DynamicCtxtPowers::getPower(long e)
{
  if (v.at(e-1).isEmpty()) { // Not computed yet, compute it now
    
    long k = 1L<<(NextPowerOfTwo(e)-1); // largest power of two smaller than e
    v[e-1] = getPower(e-k);             // compute X^e = X^{e-k} * X^k
    v[e-1].multiplyBy(getPower(k));

    v[e-1].modDownToLevel(v[e-1].findBaseLevel()); // mod-switch down to base level
  }
  return v[e-1];
}
예제 #14
0
zz_pXModulus1::zz_pXModulus1(long _m, const zz_pX& _f) 
: m(_m), f(_f), n(deg(f))
{
   assert(m > n);

   specialLogic = (m - n > 10 && m < 2*n);
   build(fm, f);
   
   if (specialLogic) {
      zz_pX P1, P2, P3;

      LocalCopyReverse(P3, f, 0, n);
      InvTrunc(P2, P3, m-n);
      LocalCopyReverse(P1, P2, 0, m-n-1);

      k = NextPowerOfTwo(2*(m-1-n)+1);
      k1 = NextPowerOfTwo(n);

      TofftRep(R0, P1, k); 
      TofftRep(R1, f, k1);
   }
}
예제 #15
0
// ------------------------------------------------------------------------
void S4::Interpreter::Realloc(size_t newCoreSize) {
    newCoreSize = NextPowerOfTwo(newCoreSize);
    if (newCoreSize > sizeOfCore) {
        char *oldCore = core;

        sizeOfCore = newCoreSize;
        core = new char[sizeOfCore];
        endOfCore = core + sizeOfCore;
        *(endOfCore - 1) = 0;

        std::strcpy(core, oldCore);

        delete [] oldCore;
    }
}
예제 #16
0
static void 
recursivePolyEval(Ctxt& ret, const ZZX& poly, long k,
		  DynamicCtxtPowers& babyStep, DynamicCtxtPowers& giantStep)
{
  if (deg(poly)<=babyStep.size()) { // Edge condition, use simple eval
    simplePolyEval(ret, poly, babyStep);
    return;
  }

  long delta = deg(poly) % k; // deg(poly) mod k
  long n = divc(deg(poly),k); // ceil( deg(poly)/k )
  long t = 1L<<(NextPowerOfTwo(n)); // t >= n, so t*k >= deg(poly)

  // Special case for deg(poly) = k * 2^e +delta
  if (n==t) {
    degPowerOfTwo(ret, poly, k, babyStep, giantStep);
    return;
  }

  // When deg(poly) = k*(2^e -1) we use the Paterson-Stockmeyer recursion
  if (n == t-1 && delta==0) {
    PatersonStockmeyer(ret, poly, k, t/2, delta, babyStep, giantStep);
    return;
  }

  t = t/2;

  // In any other case we have kt < deg(poly) < k(2t-1). We then set 
  // u = deg(poly) - k*(t-1) and poly = q*X^u + r with deg(r)<u
  // and recurse on poly = (q-1)*X^u + (X^u+r)

  long u = deg(poly) - k*(t-1);
  ZZX r = trunc(poly, u);      // degree <= u-1
  ZZX q = RightShift(poly, u); // degree == k*(t-1)
  q -= 1;
  SetCoeff(r, u);              // degree == u

  PatersonStockmeyer(ret, q, k, t/2, 0, babyStep, giantStep);

  Ctxt tmp = giantStep.getPower(u/k);
  if (delta!=0) { // if u is not divisible by k then compute it
    tmp.multiplyBy(babyStep.getPower(delta));
  }
  ret.multiplyBy(tmp);

  recursivePolyEval(tmp, r, k, babyStep, giantStep);
  ret += tmp;
}
예제 #17
0
// Returns the e'th power, computing it as needed
long DynamicPtxtPowers::getPower(long e)
{
  // FIXME: Do we want to allow the vector to grow? If so then begin by
  // checking e<v.length() and resizing if not. Currently throws an exception.

  if (v.at(e-1)<0) { // Not computed yet, compute it now
    
    long k = 1L<<(NextPowerOfTwo(e)-1); // largest power of two smaller than e

    v[e-1] = getPower(e-k);             // compute X^e = X^{e-k} * X^k
    v[e-1] = MulMod(v[e-1], getPower(k), p);
    dpth[e-1] = max(getDepth(k),getDepth(e-k)) +1;
    nMults++;
  }
  return v[e-1];
}
예제 #18
0
static void recursivePolyEval(Ctxt& ret, const Ctxt poly[], long nCoeffs,
			      const Vec<Ctxt>& powers)
{
  if (nCoeffs <= 1) { // edge condition
    if (nCoeffs == 0) ret.clear();   // empty polynomial
    else              ret = poly[0]; // constant polynomial
    return;
  }
  long logD = NextPowerOfTwo(nCoeffs)-1;
  long d = 1L << logD;
  Ctxt tmp(ZeroCtxtLike, ret);
  recursivePolyEval(tmp, &(poly[d]), nCoeffs-d, powers);
  recursivePolyEval(ret, &(poly[0]), d, powers);
  tmp.multiplyBy(powers[logD]);
  ret += tmp;
}
예제 #19
0
void FactorInt(FacVec& fvec, long n)
{
   if (n <= 1) LogicError("internal error: FactorInt(FacVec,long n) with n<=1");

   if (NTL_OVERFLOW(n, 1, 0))
      ResourceError("internal error: FactorInt(FacVec,long n) with n too large");

   long NumFactors;
   long q;

   fvec.SetLength(2*NextPowerOfTwo(n));

   NumFactors = 0;
   q = 2;

   while (n != 1) {
      if (n%q == 0) {
         fvec[NumFactors].q = q;
         n = n/q;
         fvec[NumFactors].a = 1;
         fvec[NumFactors].val = q;
         while (n%q == 0) {
            n = n/q;
            (fvec[NumFactors].a)++;
            fvec[NumFactors].val *= q;
         }         
         fvec[NumFactors].link = -1;
         NumFactors++;
      }

      q++;
   }

   fvec.SetLength(2*NumFactors-1);

   long lo = 0;
   long hi = NumFactors - 1;

   while (lo < hi) {
      FindMin(fvec, lo, hi);
      FindMin(fvec, lo+1, hi);
      hi++;
      fvec[hi].link = lo;
      fvec[hi].val = fvec[lo].val * fvec[lo+1].val;
      lo += 2;
   }
}
예제 #20
0
static void ScanCachePrepareInsert(ScanCache* self)
{
  // Check if a rehash is needed.
  size_t        old_size = self->m_TableSize;

  if (old_size > 0)
  {
    int64_t load = 0x100 * self->m_RecordCount / old_size;
    if (load < 0xc0)
      return;
  }

  MemAllocHeap *heap     = self->m_Heap;
  size_t        new_size = NextPowerOfTwo(uint32_t(old_size + 1));

  if (new_size < 64)
    new_size = 64;

  ScanCache::Record** old_table = self->m_Table;
  ScanCache::Record** new_table = HeapAllocateArrayZeroed<ScanCache::Record*>(heap, new_size);

  for (size_t i = 0; i < old_size; ++i)
  {
    ScanCache::Record* r = old_table[i];
    while (r)
    {
      ScanCache::Record *next  = r->m_Next;
#if ENABLED(USE_SHA1_HASH)
      uint32_t           hash  = r->m_Key.m_Words.m_C;
#elif ENABLED(USE_FAST_HASH)
      uint32_t           hash  = r->m_Key.m_Words32[0];
#endif
      uint32_t           index = hash &(new_size - 1);

      r->m_Next        = new_table[index];
      new_table[index] = r;

      r                = next;
    }
  }

  self->m_TableSize = (uint32_t) new_size;
  self->m_Table     = new_table;

  HeapFree(heap, old_table);
}
예제 #21
0
void CalculateFFTValues(fftRep *R1, ZZX &a, int &prime_num, int deg_b){

	zz_pBak bak;
	bak.save();

	zz_pX A;
	long k, d;

	d = 2*deg_b;
	k = NextPowerOfTwo(d+1);
	for(int i=0; i<prime_num; i++){
		zz_p::FFTInit(i);
		conv(A, a);
	    TofftRep(R1[i], A, k);
	}

	bak.restore();
}
예제 #22
0
void FFTSqrTrunc(zz_pX& x, const zz_pX& a, long n)
{
   if (IsZero(a)) {
      clear(x);
      return;
   }

   long d = 2*deg(a);
   if (n > d + 1)
      n = d + 1;

   long k = NextPowerOfTwo(d + 1);
   fftRep R1(INIT_SIZE, k);

   TofftRep(R1, a, k);
   mul(R1, R1, R1);
   FromfftRep(x, R1, 0, n-1);
}
예제 #23
0
void FFTMulTrunc(zz_pX& x, const zz_pX& a, const zz_pX& b, long n)
{
   if (IsZero(a) || IsZero(b)) {
      clear(x);
      return;
   }

   long d = deg(a) + deg(b);
   if (n > d + 1)
      n = d + 1;

   long k = NextPowerOfTwo(d + 1);
   fftRep R1(INIT_SIZE, k), R2(INIT_SIZE, k);

   TofftRep(R1, a, k);
   TofftRep(R2, b, k);
   mul(R1, R1, R2);
   FromfftRep(x, R1, 0, n-1);
}
예제 #24
0
// This procedure assumes that k*(2^e +1) > deg(poly) > k*(2^e -1),
// and that babyStep contains k+ (deg(poly) mod k) powers
static long degPowerOfTwo(const ZZX& poly, long k, DynamicPtxtPowers& babyStep,
			  DynamicPtxtPowers& giantStep, long mod,
			  long& recursiveDepth)
{
  if (deg(poly)<=babyStep.size()) { // Edge condition, use simple eval
    long ret = simplePolyEval(poly, babyStep, mod);
    recursiveDepth = babyStep.getDepth(deg(poly));
    return ret;
  }
  long subDepth1 =0, subDepth2=0;
  long n = deg(poly)/k;        // We assume n=2^e or n=2^e -1
  n = 1L << NextPowerOfTwo(n); // round up to n=2^e
  ZZX r = trunc(poly, (n-1)*k);      // degree <= k(2^e-1)-1
  ZZX q = RightShift(poly, (n-1)*k); // 0 < degree < 2k
  SetCoeff(r, (n-1)*k);              // monic, degree == k(2^e-1)
  q -= 1;
  if (verbose) cerr << ", recursing on "<<r<<" + X^"<<(n-1)*k<<"*"<<q<<endl;

  long ret = PatersonStockmeyer(r, k, n/2, 0,
				babyStep, giantStep, mod, subDepth2);
  if (verbose)
    cerr << "  PatersonStockmeyer("<<r<<") returns "<<ret
	 << ", depth="<<subDepth2<<endl;

  long tmp = simplePolyEval(q, babyStep, mod); // evaluate q
  subDepth1 = babyStep.getDepth(deg(q));
  if (verbose)
    cerr << "  simplePolyEval("<<q<<") returns "<<tmp
	 << ", depth="<<subDepth1<<endl;

  // multiply by X^{k(n-1)} with minimum depth
  for (long i=1; i<n; i*=2) {  
    tmp = MulMod(tmp, giantStep.getPower(i), mod);
    nMults++;
    subDepth1 = max(subDepth1, giantStep.getDepth(i)) +1;
    if (verbose)
      cerr << "    after mult by giantStep.getPower("<<i<< ")="
	   << giantStep.getPower(i)<<" of depth="<< giantStep.getDepth(i)
	   << ",  ret="<<tmp<<" and depth is "<<subDepth1<<endl;
  }
  totalDepth = max(subDepth1, subDepth2);
  return AddMod(ret, tmp, mod); // return q * X^{k(n-1)} + r
}
예제 #25
0
void mul(zz_pXMatrix& A, zz_pXMatrix& B, zz_pXMatrix& C)
// A = B*C, B and C are destroyed
{
   long db = deg(B(1,1));
   long dc = deg(C(1,1));
   long da = db + dc;

   long k = NextPowerOfTwo(da+1);

   fftRep B00, B01, B10, B11, C0, C1, T1, T2;
   
   TofftRep(B00, B(0,0), k); B(0,0).kill();
   TofftRep(B01, B(0,1), k); B(0,1).kill();
   TofftRep(B10, B(1,0), k); B(1,0).kill();
   TofftRep(B11, B(1,1), k); B(1,1).kill();

   TofftRep(C0, C(0,0), k);  C(0,0).kill();
   TofftRep(C1, C(1,0), k);  C(1,0).kill();

   mul(T1, B00, C0);
   mul(T2, B01, C1);
   add(T1, T1, T2);
   FromfftRep(A(0,0), T1, 0, da);

   mul(T1, B10, C0);
   mul(T2, B11, C1);
   add(T1, T1, T2);
   FromfftRep(A(1,0), T1, 0, da);

   TofftRep(C0, C(0,1), k);  C(0,1).kill();
   TofftRep(C1, C(1,1), k);  C(1,1).kill();

   mul(T1, B00, C0);
   mul(T2, B01, C1);
   add(T1, T1, T2);
   FromfftRep(A(0,1), T1, 0, da);

   mul(T1, B10, C0);
   mul(T2, B11, C1);
   add(T1, T1, T2);
   FromfftRep(A(1,1), T1, 0, da);
}
예제 #26
0
bool SFB::Audio::RingBuffer::Allocate(const AudioFormat& format, size_t capacityFrames)
{
	// Only non-interleaved formats are supported
	if(format.IsInterleaved())
		return false;

	Deallocate();

	// Round up to the next power of two
	capacityFrames = NextPowerOfTwo((uint32_t)capacityFrames);

	mFormat = format;

	mCapacityFrames = capacityFrames;
	mCapacityFramesMask = capacityFrames - 1;

	size_t capacityBytes = format.FrameCountToByteCount(capacityFrames);

	// One memory allocation holds everything- first the pointers followed by the deinterleaved channels
	size_t allocationSize = (capacityBytes + sizeof(uint8_t *)) * format.mChannelsPerFrame;
	uint8_t *memoryChunk = (uint8_t *)malloc(allocationSize);
	if(nullptr == memoryChunk)
		return false;

	// Zero the entire allocation
	memset(memoryChunk, 0, allocationSize);

	// Assign the pointers and channel buffers
	mBuffers = (uint8_t **)memoryChunk;
	memoryChunk += format.mChannelsPerFrame * sizeof(uint8_t *);
	for(UInt32 i = 0; i < format.mChannelsPerFrame; ++i) {
		mBuffers[i] = memoryChunk;
		memoryChunk += capacityBytes;
	}

	mReadPointer = 0;
	mWritePointer = 0;

	return true;
}
예제 #27
0
void AudioTee::start() {
  if (mInputDevice.mID == kAudioDeviceUnknown || mOutputDevice.mID == kAudioDeviceUnknown) return;
  if (mInputDevice.mFormat.mSampleRate != mOutputDevice.mFormat.mSampleRate) {
    printf("Error in AudioTee::Start() - sample rate mismatch: %f / %f\n", mInputDevice.mFormat.mSampleRate, mOutputDevice.mFormat.mSampleRate);
    return;
  }
  mWorkBuf = new Byte[mInputDevice.mBufferSizeFrames * mInputDevice.mFormat.mBytesPerFrame];
  memset(mWorkBuf, 0, mInputDevice.mBufferSizeFrames * mInputDevice.mFormat.mBytesPerFrame);
  UInt32 framesInHistoryBuffer = NextPowerOfTwo(mInputDevice.mFormat.mSampleRate * mSecondsInHistoryBuffer);
  mHistoryBufferMaxByteSize = mInputDevice.mFormat.mBytesPerFrame * framesInHistoryBuffer;
  mHistBuf = new CARingBuffer();
  mHistBuf->Allocate(2, mInputDevice.mFormat.mBytesPerFrame, framesInHistoryBuffer);
  printf("Initializing history buffer with byte capacity %u — %f seconds at %f kHz", mHistoryBufferMaxByteSize, (mHistoryBufferMaxByteSize / mInputDevice.mFormat.mSampleRate / (4 * 2)), mInputDevice.mFormat.mSampleRate);
  printf("Initializing work buffer with mBufferSizeFrames:%u and mBytesPerFrame %u\n", mInputDevice.mBufferSizeFrames, mInputDevice.mFormat.mBytesPerFrame);
  mInputIOProcID = NULL;
  AudioDeviceCreateIOProcID(mInputDevice.mID, InputIOProc, this, &mInputIOProcID);
  AudioDeviceStart(mInputDevice.mID, mInputIOProcID);
  mOutputIOProc = OutputIOProc;
  mOutputIOProcID = NULL;
  AudioDeviceCreateIOProcID(mOutputDevice.mID, mOutputIOProc, this, &mOutputIOProcID);
  AudioDeviceStart(mOutputDevice.mID, mOutputIOProcID);
}
예제 #28
0
// Main entry point: Evaluate an encrypted polynomial on an encrypted input
// return in ret = sum_i poly[i] * x^i
void polyEval(Ctxt& ret, const Vec<Ctxt>& poly, const Ctxt& x)
{
  if (poly.length()<=1) { // Some special cases
    if (poly.length()==0) ret.clear();   // empty polynomial
    else                  ret = poly[0]; // constant polynomial
    return;
  }
  long deg = poly.length()-1;

  long logD = NextPowerOfTwo(divc(poly.length(),3));
  long d = 1L << logD;

  // We have d <= deg(poly) < 3d
  assert(d <= deg && deg < 3*d);

  Vec<Ctxt> powers(INIT_SIZE, logD+1, x);
  if (logD>0) {
    powers[1].square();
    for (long i=2; i<=logD; i++) { // powers[i] = x^{2^i}
      powers[i] = powers[i-1];
      powers[i].square();
    }
  }

  // Compute in three parts p0(X) + ( p1(X) + p2(X)*X^d )*X^d
  Ctxt tmp(ZeroCtxtLike, ret);
  recursivePolyEval(ret, &poly[d], min(d,poly.length()-d), powers); // p1(X)

  if (poly.length() > 2*d) {    // p2 is not empty
    recursivePolyEval(tmp, &poly[2*d], poly.length()-2*d, powers);  // p2(X)
    tmp.multiplyBy(powers[logD]);
    ret += tmp;
  }
  ret.multiplyBy(powers[logD]); // ( p1(X) + p2(X)*X^d )*X^d

  recursivePolyEval(tmp, &poly[0], d, powers);                      // p0(X)
  ret += tmp;
}
예제 #29
0
bool SFB::RingBuffer::Allocate(size_t capacityBytes)
{
	Deallocate();

	// Round up to the next power of two
	capacityBytes = NextPowerOfTwo((uint32_t)capacityBytes);

	mCapacityBytes = capacityBytes;
	mCapacityBytesMask = capacityBytes - 1;

	try {
		mBuffer = new uint8_t [mCapacityBytes];
	}

	catch(const std::exception& e) {
		return false;
	}

	mReadPointer = 0;
	mWritePointer = 0;

	return true;
}
예제 #30
0
void BluesteinInit(long n, const zz_p& root, zz_pX& powers, 
                   Vec<mulmod_precon_t>& powers_aux, fftRep& Rb)
{
  long p = zz_p::modulus();

  zz_p one; one=1;
  powers.SetMaxLength(n);

  SetCoeff(powers,0,one);
  for (long i=1; i<n; i++) {
    long iSqr = MulMod(i, i, 2*n); // i^2 mod 2n
    SetCoeff(powers,i, power(root,iSqr)); // powers[i] = root^{i^2}
  }

  // powers_aux tracks powers
  powers_aux.SetLength(n);
  for (long i = 0; i < n; i++)
    powers_aux[i] = PrepMulModPrecon(rep(powers[i]), p);


  long k = NextPowerOfTwo(2*n-1);
  long k2 = 1L << k; // k2 = 2^k

  Rb.SetSize(k);
  zz_pX b(INIT_SIZE, k2);

  zz_p rInv = inv(root);
  SetCoeff(b,n-1,one); // b[n-1] = 1
  for (long i=1; i<n; i++) {
    long iSqr = MulMod(i, i, 2*n); // i^2 mod 2n
    zz_p bi = power(rInv,iSqr);
    SetCoeff(b,n-1+i, bi); // b[n-1+i] = b[n-1-i] = root^{-i^2}
    SetCoeff(b,n-1-i,bi);              
  }

  TofftRep(Rb, b, k);
}