/* * Returns a size that is larger than and closest to aSize where both * width and height are powers of two. * If the OpenGL setup is capable of using non-POT textures, then it * will just return aSize. */ static IntSize CalculatePOTSize(const IntSize& aSize, GLContext* gl) { if (gl->CanUploadNonPowerOfTwo()) return aSize; return IntSize(NextPowerOfTwo(aSize.width), NextPowerOfTwo(aSize.height)); }
// ------------------------------------------------------------------------ S4::Interpreter::Interpreter(const char *program) { static const char *coreWords = "42 ."; dictionary = new Dictionary(); program = program ? program : ""; sizeOfCore = NextPowerOfTwo(std::strlen(coreWords) + 1 + std::strlen(program)); core = new char[sizeOfCore]; endOfCore = core + sizeOfCore; *(endOfCore - 1) = 0; // load primitives AddPrimitive(new S4::Primitive::Add()); AddPrimitive(new S4::Primitive::CloseBrace()); AddPrimitive(new S4::Primitive::Dot()); AddPrimitive(new S4::Primitive::Dup()); AddPrimitive(new S4::Primitive::OpenBrace()); AddPrimitive(new S4::Primitive::StackHeight()); // put core words at the beginning of memory and the program after it core = StrCat(coreWords, " ", program); printf(".core:\t%s\n", core); // and initialize the program counter programCounter = core; }
void Buffer::Allocate(unsigned size_, bool keepold) { char* newbuffer; if (size_ <= size) return; size_ = NextPowerOfTwo(size_); if (buffer == array || preallocated) newbuffer = (char*) malloc(size_); else newbuffer = (char*) realloc(buffer, size_); ASSERT(newbuffer != NULL); if (keepold && length > 0) { if (buffer == array) memcpy(newbuffer, buffer, length); } buffer = newbuffer; size = size_; preallocated = false; }
void Buffer::Allocate(unsigned size_, bool keepold) { char* newbuffer; if (size_ <= size) return; size_ = NextPowerOfTwo(size_); if (buffer == array || preallocated) newbuffer = (char*) malloc(size_); else newbuffer = (char*) realloc(buffer, size_); if (newbuffer == NULL) { throw std::bad_alloc(); // in case of exceptions are disabled STOP_FAIL(1, "Out of memory error"); } if (keepold && length > 0) { if (buffer == array || preallocated) memcpy(newbuffer, buffer, length); } buffer = newbuffer; size = size_; preallocated = false; }
void CARingBuffer::Allocate(int nChannels, UInt32 bytesPerFrame, UInt32 capacityFrames) { Deallocate(); capacityFrames = NextPowerOfTwo(capacityFrames); mNumberChannels = nChannels; mBytesPerFrame = bytesPerFrame; mCapacityFrames = capacityFrames; mCapacityFramesMask = capacityFrames - 1; mCapacityBytes = bytesPerFrame * capacityFrames; // put everything in one memory allocation, first the pointers, then the deinterleaved channels UInt32 allocSize = (mCapacityBytes + sizeof(Byte *)) * nChannels; Byte *p = (Byte *)CA_malloc(allocSize); memset(p, 0, allocSize); mBuffers = (Byte **)p; p += nChannels * sizeof(Byte *); for (int i = 0; i < nChannels; ++i) { mBuffers[i] = p; p += mCapacityBytes; } for (UInt32 i = 0; i<kGeneralRingTimeBoundsQueueSize; ++i) { mTimeBoundsQueue[i].mStartTime = 0; mTimeBoundsQueue[i].mEndTime = 0; mTimeBoundsQueue[i].mUpdateCounter = 0; } mTimeBoundsQueuePtr = 0; }
void BluesteinFFT(zz_pX& x, long n, const zz_p& root, const zz_pX& powers, const Vec<mulmod_precon_t>& powers_aux, const fftRep& Rb) { // FHE_TIMER_START; if (IsZero(x)) return; if (n<=0) { clear(x); return; } long p = zz_p::modulus(); long dx = deg(x); for (long i=0; i<=dx; i++) { x[i].LoopHole() = MulModPrecon(rep(x[i]), rep(powers[i]), p, powers_aux[i]); } x.normalize(); long k = NextPowerOfTwo(2*n-1); fftRep& Ra = Cmodulus::getScratch_fftRep(k); TofftRep(Ra, x, k); mul(Ra,Ra,Rb); // multiply in FFT representation FromfftRep(x, Ra, n-1, 2*(n-1)); // then convert back dx = deg(x); for (long i=0; i<=dx; i++) { x[i].LoopHole() = MulModPrecon(rep(x[i]), rep(powers[i]), p, powers_aux[i]); } x.normalize(); }
void Light::SetShadowMapSize(int size) { if (size < 1) size = 1; shadowMapSize = NextPowerOfTwo(size); }
// This procedure assumes that k*(2^e +1) > deg(poly) > k*(2^e -1), // and that babyStep contains >= k + (deg(poly) mod k) powers static void degPowerOfTwo(Ctxt& ret, const ZZX& poly, long k, DynamicCtxtPowers& babyStep, DynamicCtxtPowers& giantStep) { if (deg(poly)<=babyStep.size()) { // Edge condition, use simple eval simplePolyEval(ret, poly, babyStep); return; } long n = deg(poly)/k; // We assume n=2^e or n=2^e -1 n = 1L << NextPowerOfTwo(n); // round up to n=2^e ZZX r = trunc(poly, (n-1)*k); // degree <= k(2^e-1)-1 ZZX q = RightShift(poly, (n-1)*k); // 0 < degree < 2k SetCoeff(r, (n-1)*k); // monic, degree == k(2^e-1) q -= 1; PatersonStockmeyer(ret, r, k, n/2, 0, babyStep, giantStep); Ctxt tmp(ret.getPubKey(), ret.getPtxtSpace()); simplePolyEval(tmp, q, babyStep); // evaluate q // multiply by X^{k(n-1)} with minimum depth for (long i=1; i<n; i*=2) { tmp.multiplyBy(giantStep.getPower(i)); } ret += tmp; }
void BuildQueueInit(BuildQueue* queue, const BuildQueueConfig* config) { CHECK(config->m_MaxExpensiveCount > 0 && config->m_MaxExpensiveCount <= config->m_ThreadCount); MutexInit(&queue->m_Lock); CondInit(&queue->m_WorkAvailable); // Compute queue capacity. Allocate space for a power of two number of // indices that's at least one larger than the max number of nodes. Because // the queue is treated as a ring buffer, we want W=R to mean an empty // buffer. uint32_t capacity = NextPowerOfTwo(config->m_MaxNodes + 1); MemAllocHeap* heap = config->m_Heap; queue->m_Queue = HeapAllocateArray<int32_t>(heap, capacity); queue->m_QueueReadIndex = 0; queue->m_QueueWriteIndex = 0; queue->m_QueueCapacity = capacity; queue->m_Config = *config; queue->m_PendingNodeCount = 0; queue->m_FailedNodeCount = 0; queue->m_QuitSignalled = false; queue->m_ExpensiveRunning = 0; queue->m_ExpensiveWaitCount = 0; queue->m_ExpensiveWaitList = HeapAllocateArray<NodeState*>(heap, capacity); CHECK(queue->m_Queue); if (queue->m_Config.m_ThreadCount > kMaxBuildThreads) { Log(kWarning, "too many build threads (%d) - clamping to %d", queue->m_Config.m_ThreadCount, kMaxBuildThreads); queue->m_Config.m_ThreadCount = kMaxBuildThreads; } Log(kDebug, "build queue initialized; ring buffer capacity = %u", queue->m_QueueCapacity); // Block all signals on the main thread. SignalBlockThread(true); SignalHandlerSetCondition(&queue->m_WorkAvailable); // Create build threads. for (int i = 0, thread_count = config->m_ThreadCount; i < thread_count; ++i) { ThreadState* thread_state = &queue->m_ThreadState[i]; ThreadStateInit(thread_state, queue, MB(64), MB(32), i); if (i > 0) { Log(kDebug, "starting build thread %d", i); queue->m_Threads[i] = ThreadStart(BuildThreadRoutine, thread_state); } } }
/** * \brief Loads a texture from an image file and fits into a GL-compatible size. * \param fileName The name of the image file. * \param filtering True for linear filtering, false for nearest-neighbor. * \param realW Returns the real width of the texture. * \param realH Returns the real height of the texture. * \return The OpenGL texture ID. */ long SDLGL_LoadTextureFromFileBestFit( std::string fileName, bool filtering, unsigned long &realW, unsigned long &realH ) { GLuint theTexture; SDL_Surface *loadSurface, *theSurface, *convertedSurface; Uint32 rmask, gmask, bmask, amask; loadSurface = IMG_Load( fileName.c_str() ); if ( loadSurface ) { #if SDL_BYTEORDER == SDL_BIG_ENDIAN rmask = 0xff000000; gmask = 0x00ff0000; bmask = 0x0000ff00; amask = 0x000000ff; #else rmask = 0x000000ff; gmask = 0x0000ff00; bmask = 0x00ff0000; amask = 0xff000000; #endif theSurface = SDL_CreateRGBSurface( SDL_SWSURFACE | SDL_SRCALPHA, NextPowerOfTwo(loadSurface->w), NextPowerOfTwo(loadSurface->h), 32, rmask, gmask, bmask, amask ); SDL_FillRect( theSurface, NULL, SDL_MapRGBA( theSurface->format, 0,0,0,255 ) ); convertedSurface = SDL_ConvertSurface( loadSurface, theSurface->format, SDL_SWSURFACE | SDL_SRCALPHA ); MoveTexture( convertedSurface, theSurface ); } else theSurface = NULL; if ( theSurface ) { glGenTextures( 1, &theTexture); glBindTexture( GL_TEXTURE_2D, theTexture ); if ( filtering ) { glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR ); glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR ); } else { glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST ); glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST ); } realW = loadSurface->w; realH = loadSurface->h; glTexImage2D( GL_TEXTURE_2D, 0, 4, theSurface->w, theSurface->h, 0, GL_RGBA, GL_UNSIGNED_BYTE, theSurface->pixels ); SDL_FreeSurface( theSurface ); SDL_FreeSurface( loadSurface ); SDL_FreeSurface( convertedSurface ); return theTexture; } else return 0; // GLspeak for "no texture" }
// Constructor: it is assumed that zms is already set with m>1 // If q == 0, then the current context is used template <class type> Cmod<type>:: Cmod(const PAlgebra &zms, const zz &qq, const zz &rt) { assert(zms.getM()>1); bool explicitModulus = true; if (qq == 0) { q = zp::modulus(); explicitModulus = false; } else q = qq; zMStar = &zms; root = rt; zz mm; mm = zms.getM(); m_inv = InvMod(mm, q); zz_pBak bak; if (explicitModulus) { bak.save(); // backup the current modulus context = BuildContext(q, NextPowerOfTwo(zms.getM()) + 1); context.restore(); // set NTL's current modulus to q } else context.save(); if (IsZero(root)) { // Find a 2m-th root of unity modulo q, if not given zp rtp; long e = 2*zms.getM(); FindPrimitiveRoot(rtp,e); // NTL routine, relative to current modulus if (IsZero(rtp)) // sanity check Error("Cmod::compRoots(): no 2m'th roots of unity mod q"); root = rep(rtp); } rInv = InvMod(root,q); // set rInv = root^{-1} mod q // Allocate memory (relative to current modulus that was defined above). // These objects will be initialized when anyone calls FFT/iFFT. zpx phimx_poly; conv(phimx_poly, zms.getPhimX()); powers = new zpx(); Rb = new fftrep(); Ra = new fftrep(); ipowers = new zpx(); iRb = new fftrep(); phimx = new zpxModulus(phimx_poly); scratch = new zpx(); }
// prime power solver // A is an n x n matrix, we compute its inverse mod p^r. An error is raised // if A is not inverible mod p. zz_p::modulus() is assumed to be p^r, for // p prime, r >= 1. Also zz_pE::modulus() is assumed to be initialized. void ppInvert(mat_zz_pE& X, const mat_zz_pE& A, long p, long r) { if (r == 1) { // use native inversion from NTL inv(X, A); // X = A^{-1} return; } // begin by inverting A modulo p // convert to ZZX for a safe transaltion to mod-p objects vector< vector<ZZX> > tmp; convert(tmp, A); { // open a new block for mod-p computation ZZX G; convert(G, zz_pE::modulus()); zz_pBak bak_pr; bak_pr.save(); // backup the mod-p^r moduli zz_pEBak bak_prE; bak_prE.save(); zz_p::init(p); // Set the mod-p moduli zz_pE::init(conv<zz_pX>(G)); mat_zz_pE A1, Inv1; convert(A1, tmp); // Recover A as a mat_zz_pE object modulo p inv(Inv1, A1); // Inv1 = A^{-1} (mod p) convert(tmp, Inv1); // convert to ZZX for transaltion to a mod-p^r object } // mod-p^r moduli restored on desctuction of bak_pr and bak_prE mat_zz_pE XX; convert(XX, tmp); // XX = A^{-1} (mod p) // Now lift the solution modulo p^r // Compute the "correction factor" Z, s.t. XX*A = I - p*Z (mod p^r) long n = A.NumRows(); const mat_zz_pE I = ident_mat_zz_pE(n); // identity matrix mat_zz_pE Z = I - XX*A; convert(tmp, Z); // Conver to ZZX to divide by p for (long i=0; i<n; i++) for (long j=0; j<n; j++) tmp[i][j] /= p; convert(Z, tmp); // convert back to a mod-p^r object // The inverse of A is ( I+(pZ)+(pZ)^2+...+(pZ)^{r-1} )*XX (mod p^r). We use // O(log r) products to copmute it as (I+pZ)* (I+(pZ)^2)* (I+(pZ)^4)*...* XX long e = NextPowerOfTwo(r); // 2^e is smallest power of two >= r Z *= p; // = pZ mat_zz_pE prod = I + Z; // = I + pZ for (long i=1; i<e; i++) { sqr(Z, Z); // = (pZ)^{2^i} prod *= (I+Z); // = sum_{j=0}^{2^{i+1}-1} (pZ)^j } mul(X, prod, XX); // X = A^{-1} mod p^r assert(X*A == I); }
// Returns the e'th power of X, computing it as needed Ctxt& DynamicCtxtPowers::getPower(long e) { if (v.at(e-1).isEmpty()) { // Not computed yet, compute it now long k = 1L<<(NextPowerOfTwo(e)-1); // largest power of two smaller than e v[e-1] = getPower(e-k); // compute X^e = X^{e-k} * X^k v[e-1].multiplyBy(getPower(k)); v[e-1].modDownToLevel(v[e-1].findBaseLevel()); // mod-switch down to base level } return v[e-1]; }
zz_pXModulus1::zz_pXModulus1(long _m, const zz_pX& _f) : m(_m), f(_f), n(deg(f)) { assert(m > n); specialLogic = (m - n > 10 && m < 2*n); build(fm, f); if (specialLogic) { zz_pX P1, P2, P3; LocalCopyReverse(P3, f, 0, n); InvTrunc(P2, P3, m-n); LocalCopyReverse(P1, P2, 0, m-n-1); k = NextPowerOfTwo(2*(m-1-n)+1); k1 = NextPowerOfTwo(n); TofftRep(R0, P1, k); TofftRep(R1, f, k1); } }
// ------------------------------------------------------------------------ void S4::Interpreter::Realloc(size_t newCoreSize) { newCoreSize = NextPowerOfTwo(newCoreSize); if (newCoreSize > sizeOfCore) { char *oldCore = core; sizeOfCore = newCoreSize; core = new char[sizeOfCore]; endOfCore = core + sizeOfCore; *(endOfCore - 1) = 0; std::strcpy(core, oldCore); delete [] oldCore; } }
static void recursivePolyEval(Ctxt& ret, const ZZX& poly, long k, DynamicCtxtPowers& babyStep, DynamicCtxtPowers& giantStep) { if (deg(poly)<=babyStep.size()) { // Edge condition, use simple eval simplePolyEval(ret, poly, babyStep); return; } long delta = deg(poly) % k; // deg(poly) mod k long n = divc(deg(poly),k); // ceil( deg(poly)/k ) long t = 1L<<(NextPowerOfTwo(n)); // t >= n, so t*k >= deg(poly) // Special case for deg(poly) = k * 2^e +delta if (n==t) { degPowerOfTwo(ret, poly, k, babyStep, giantStep); return; } // When deg(poly) = k*(2^e -1) we use the Paterson-Stockmeyer recursion if (n == t-1 && delta==0) { PatersonStockmeyer(ret, poly, k, t/2, delta, babyStep, giantStep); return; } t = t/2; // In any other case we have kt < deg(poly) < k(2t-1). We then set // u = deg(poly) - k*(t-1) and poly = q*X^u + r with deg(r)<u // and recurse on poly = (q-1)*X^u + (X^u+r) long u = deg(poly) - k*(t-1); ZZX r = trunc(poly, u); // degree <= u-1 ZZX q = RightShift(poly, u); // degree == k*(t-1) q -= 1; SetCoeff(r, u); // degree == u PatersonStockmeyer(ret, q, k, t/2, 0, babyStep, giantStep); Ctxt tmp = giantStep.getPower(u/k); if (delta!=0) { // if u is not divisible by k then compute it tmp.multiplyBy(babyStep.getPower(delta)); } ret.multiplyBy(tmp); recursivePolyEval(tmp, r, k, babyStep, giantStep); ret += tmp; }
// Returns the e'th power, computing it as needed long DynamicPtxtPowers::getPower(long e) { // FIXME: Do we want to allow the vector to grow? If so then begin by // checking e<v.length() and resizing if not. Currently throws an exception. if (v.at(e-1)<0) { // Not computed yet, compute it now long k = 1L<<(NextPowerOfTwo(e)-1); // largest power of two smaller than e v[e-1] = getPower(e-k); // compute X^e = X^{e-k} * X^k v[e-1] = MulMod(v[e-1], getPower(k), p); dpth[e-1] = max(getDepth(k),getDepth(e-k)) +1; nMults++; } return v[e-1]; }
static void recursivePolyEval(Ctxt& ret, const Ctxt poly[], long nCoeffs, const Vec<Ctxt>& powers) { if (nCoeffs <= 1) { // edge condition if (nCoeffs == 0) ret.clear(); // empty polynomial else ret = poly[0]; // constant polynomial return; } long logD = NextPowerOfTwo(nCoeffs)-1; long d = 1L << logD; Ctxt tmp(ZeroCtxtLike, ret); recursivePolyEval(tmp, &(poly[d]), nCoeffs-d, powers); recursivePolyEval(ret, &(poly[0]), d, powers); tmp.multiplyBy(powers[logD]); ret += tmp; }
void FactorInt(FacVec& fvec, long n) { if (n <= 1) LogicError("internal error: FactorInt(FacVec,long n) with n<=1"); if (NTL_OVERFLOW(n, 1, 0)) ResourceError("internal error: FactorInt(FacVec,long n) with n too large"); long NumFactors; long q; fvec.SetLength(2*NextPowerOfTwo(n)); NumFactors = 0; q = 2; while (n != 1) { if (n%q == 0) { fvec[NumFactors].q = q; n = n/q; fvec[NumFactors].a = 1; fvec[NumFactors].val = q; while (n%q == 0) { n = n/q; (fvec[NumFactors].a)++; fvec[NumFactors].val *= q; } fvec[NumFactors].link = -1; NumFactors++; } q++; } fvec.SetLength(2*NumFactors-1); long lo = 0; long hi = NumFactors - 1; while (lo < hi) { FindMin(fvec, lo, hi); FindMin(fvec, lo+1, hi); hi++; fvec[hi].link = lo; fvec[hi].val = fvec[lo].val * fvec[lo+1].val; lo += 2; } }
static void ScanCachePrepareInsert(ScanCache* self) { // Check if a rehash is needed. size_t old_size = self->m_TableSize; if (old_size > 0) { int64_t load = 0x100 * self->m_RecordCount / old_size; if (load < 0xc0) return; } MemAllocHeap *heap = self->m_Heap; size_t new_size = NextPowerOfTwo(uint32_t(old_size + 1)); if (new_size < 64) new_size = 64; ScanCache::Record** old_table = self->m_Table; ScanCache::Record** new_table = HeapAllocateArrayZeroed<ScanCache::Record*>(heap, new_size); for (size_t i = 0; i < old_size; ++i) { ScanCache::Record* r = old_table[i]; while (r) { ScanCache::Record *next = r->m_Next; #if ENABLED(USE_SHA1_HASH) uint32_t hash = r->m_Key.m_Words.m_C; #elif ENABLED(USE_FAST_HASH) uint32_t hash = r->m_Key.m_Words32[0]; #endif uint32_t index = hash &(new_size - 1); r->m_Next = new_table[index]; new_table[index] = r; r = next; } } self->m_TableSize = (uint32_t) new_size; self->m_Table = new_table; HeapFree(heap, old_table); }
void CalculateFFTValues(fftRep *R1, ZZX &a, int &prime_num, int deg_b){ zz_pBak bak; bak.save(); zz_pX A; long k, d; d = 2*deg_b; k = NextPowerOfTwo(d+1); for(int i=0; i<prime_num; i++){ zz_p::FFTInit(i); conv(A, a); TofftRep(R1[i], A, k); } bak.restore(); }
void FFTSqrTrunc(zz_pX& x, const zz_pX& a, long n) { if (IsZero(a)) { clear(x); return; } long d = 2*deg(a); if (n > d + 1) n = d + 1; long k = NextPowerOfTwo(d + 1); fftRep R1(INIT_SIZE, k); TofftRep(R1, a, k); mul(R1, R1, R1); FromfftRep(x, R1, 0, n-1); }
void FFTMulTrunc(zz_pX& x, const zz_pX& a, const zz_pX& b, long n) { if (IsZero(a) || IsZero(b)) { clear(x); return; } long d = deg(a) + deg(b); if (n > d + 1) n = d + 1; long k = NextPowerOfTwo(d + 1); fftRep R1(INIT_SIZE, k), R2(INIT_SIZE, k); TofftRep(R1, a, k); TofftRep(R2, b, k); mul(R1, R1, R2); FromfftRep(x, R1, 0, n-1); }
// This procedure assumes that k*(2^e +1) > deg(poly) > k*(2^e -1), // and that babyStep contains k+ (deg(poly) mod k) powers static long degPowerOfTwo(const ZZX& poly, long k, DynamicPtxtPowers& babyStep, DynamicPtxtPowers& giantStep, long mod, long& recursiveDepth) { if (deg(poly)<=babyStep.size()) { // Edge condition, use simple eval long ret = simplePolyEval(poly, babyStep, mod); recursiveDepth = babyStep.getDepth(deg(poly)); return ret; } long subDepth1 =0, subDepth2=0; long n = deg(poly)/k; // We assume n=2^e or n=2^e -1 n = 1L << NextPowerOfTwo(n); // round up to n=2^e ZZX r = trunc(poly, (n-1)*k); // degree <= k(2^e-1)-1 ZZX q = RightShift(poly, (n-1)*k); // 0 < degree < 2k SetCoeff(r, (n-1)*k); // monic, degree == k(2^e-1) q -= 1; if (verbose) cerr << ", recursing on "<<r<<" + X^"<<(n-1)*k<<"*"<<q<<endl; long ret = PatersonStockmeyer(r, k, n/2, 0, babyStep, giantStep, mod, subDepth2); if (verbose) cerr << " PatersonStockmeyer("<<r<<") returns "<<ret << ", depth="<<subDepth2<<endl; long tmp = simplePolyEval(q, babyStep, mod); // evaluate q subDepth1 = babyStep.getDepth(deg(q)); if (verbose) cerr << " simplePolyEval("<<q<<") returns "<<tmp << ", depth="<<subDepth1<<endl; // multiply by X^{k(n-1)} with minimum depth for (long i=1; i<n; i*=2) { tmp = MulMod(tmp, giantStep.getPower(i), mod); nMults++; subDepth1 = max(subDepth1, giantStep.getDepth(i)) +1; if (verbose) cerr << " after mult by giantStep.getPower("<<i<< ")=" << giantStep.getPower(i)<<" of depth="<< giantStep.getDepth(i) << ", ret="<<tmp<<" and depth is "<<subDepth1<<endl; } totalDepth = max(subDepth1, subDepth2); return AddMod(ret, tmp, mod); // return q * X^{k(n-1)} + r }
void mul(zz_pXMatrix& A, zz_pXMatrix& B, zz_pXMatrix& C) // A = B*C, B and C are destroyed { long db = deg(B(1,1)); long dc = deg(C(1,1)); long da = db + dc; long k = NextPowerOfTwo(da+1); fftRep B00, B01, B10, B11, C0, C1, T1, T2; TofftRep(B00, B(0,0), k); B(0,0).kill(); TofftRep(B01, B(0,1), k); B(0,1).kill(); TofftRep(B10, B(1,0), k); B(1,0).kill(); TofftRep(B11, B(1,1), k); B(1,1).kill(); TofftRep(C0, C(0,0), k); C(0,0).kill(); TofftRep(C1, C(1,0), k); C(1,0).kill(); mul(T1, B00, C0); mul(T2, B01, C1); add(T1, T1, T2); FromfftRep(A(0,0), T1, 0, da); mul(T1, B10, C0); mul(T2, B11, C1); add(T1, T1, T2); FromfftRep(A(1,0), T1, 0, da); TofftRep(C0, C(0,1), k); C(0,1).kill(); TofftRep(C1, C(1,1), k); C(1,1).kill(); mul(T1, B00, C0); mul(T2, B01, C1); add(T1, T1, T2); FromfftRep(A(0,1), T1, 0, da); mul(T1, B10, C0); mul(T2, B11, C1); add(T1, T1, T2); FromfftRep(A(1,1), T1, 0, da); }
bool SFB::Audio::RingBuffer::Allocate(const AudioFormat& format, size_t capacityFrames) { // Only non-interleaved formats are supported if(format.IsInterleaved()) return false; Deallocate(); // Round up to the next power of two capacityFrames = NextPowerOfTwo((uint32_t)capacityFrames); mFormat = format; mCapacityFrames = capacityFrames; mCapacityFramesMask = capacityFrames - 1; size_t capacityBytes = format.FrameCountToByteCount(capacityFrames); // One memory allocation holds everything- first the pointers followed by the deinterleaved channels size_t allocationSize = (capacityBytes + sizeof(uint8_t *)) * format.mChannelsPerFrame; uint8_t *memoryChunk = (uint8_t *)malloc(allocationSize); if(nullptr == memoryChunk) return false; // Zero the entire allocation memset(memoryChunk, 0, allocationSize); // Assign the pointers and channel buffers mBuffers = (uint8_t **)memoryChunk; memoryChunk += format.mChannelsPerFrame * sizeof(uint8_t *); for(UInt32 i = 0; i < format.mChannelsPerFrame; ++i) { mBuffers[i] = memoryChunk; memoryChunk += capacityBytes; } mReadPointer = 0; mWritePointer = 0; return true; }
void AudioTee::start() { if (mInputDevice.mID == kAudioDeviceUnknown || mOutputDevice.mID == kAudioDeviceUnknown) return; if (mInputDevice.mFormat.mSampleRate != mOutputDevice.mFormat.mSampleRate) { printf("Error in AudioTee::Start() - sample rate mismatch: %f / %f\n", mInputDevice.mFormat.mSampleRate, mOutputDevice.mFormat.mSampleRate); return; } mWorkBuf = new Byte[mInputDevice.mBufferSizeFrames * mInputDevice.mFormat.mBytesPerFrame]; memset(mWorkBuf, 0, mInputDevice.mBufferSizeFrames * mInputDevice.mFormat.mBytesPerFrame); UInt32 framesInHistoryBuffer = NextPowerOfTwo(mInputDevice.mFormat.mSampleRate * mSecondsInHistoryBuffer); mHistoryBufferMaxByteSize = mInputDevice.mFormat.mBytesPerFrame * framesInHistoryBuffer; mHistBuf = new CARingBuffer(); mHistBuf->Allocate(2, mInputDevice.mFormat.mBytesPerFrame, framesInHistoryBuffer); printf("Initializing history buffer with byte capacity %u — %f seconds at %f kHz", mHistoryBufferMaxByteSize, (mHistoryBufferMaxByteSize / mInputDevice.mFormat.mSampleRate / (4 * 2)), mInputDevice.mFormat.mSampleRate); printf("Initializing work buffer with mBufferSizeFrames:%u and mBytesPerFrame %u\n", mInputDevice.mBufferSizeFrames, mInputDevice.mFormat.mBytesPerFrame); mInputIOProcID = NULL; AudioDeviceCreateIOProcID(mInputDevice.mID, InputIOProc, this, &mInputIOProcID); AudioDeviceStart(mInputDevice.mID, mInputIOProcID); mOutputIOProc = OutputIOProc; mOutputIOProcID = NULL; AudioDeviceCreateIOProcID(mOutputDevice.mID, mOutputIOProc, this, &mOutputIOProcID); AudioDeviceStart(mOutputDevice.mID, mOutputIOProcID); }
// Main entry point: Evaluate an encrypted polynomial on an encrypted input // return in ret = sum_i poly[i] * x^i void polyEval(Ctxt& ret, const Vec<Ctxt>& poly, const Ctxt& x) { if (poly.length()<=1) { // Some special cases if (poly.length()==0) ret.clear(); // empty polynomial else ret = poly[0]; // constant polynomial return; } long deg = poly.length()-1; long logD = NextPowerOfTwo(divc(poly.length(),3)); long d = 1L << logD; // We have d <= deg(poly) < 3d assert(d <= deg && deg < 3*d); Vec<Ctxt> powers(INIT_SIZE, logD+1, x); if (logD>0) { powers[1].square(); for (long i=2; i<=logD; i++) { // powers[i] = x^{2^i} powers[i] = powers[i-1]; powers[i].square(); } } // Compute in three parts p0(X) + ( p1(X) + p2(X)*X^d )*X^d Ctxt tmp(ZeroCtxtLike, ret); recursivePolyEval(ret, &poly[d], min(d,poly.length()-d), powers); // p1(X) if (poly.length() > 2*d) { // p2 is not empty recursivePolyEval(tmp, &poly[2*d], poly.length()-2*d, powers); // p2(X) tmp.multiplyBy(powers[logD]); ret += tmp; } ret.multiplyBy(powers[logD]); // ( p1(X) + p2(X)*X^d )*X^d recursivePolyEval(tmp, &poly[0], d, powers); // p0(X) ret += tmp; }
bool SFB::RingBuffer::Allocate(size_t capacityBytes) { Deallocate(); // Round up to the next power of two capacityBytes = NextPowerOfTwo((uint32_t)capacityBytes); mCapacityBytes = capacityBytes; mCapacityBytesMask = capacityBytes - 1; try { mBuffer = new uint8_t [mCapacityBytes]; } catch(const std::exception& e) { return false; } mReadPointer = 0; mWritePointer = 0; return true; }
void BluesteinInit(long n, const zz_p& root, zz_pX& powers, Vec<mulmod_precon_t>& powers_aux, fftRep& Rb) { long p = zz_p::modulus(); zz_p one; one=1; powers.SetMaxLength(n); SetCoeff(powers,0,one); for (long i=1; i<n; i++) { long iSqr = MulMod(i, i, 2*n); // i^2 mod 2n SetCoeff(powers,i, power(root,iSqr)); // powers[i] = root^{i^2} } // powers_aux tracks powers powers_aux.SetLength(n); for (long i = 0; i < n; i++) powers_aux[i] = PrepMulModPrecon(rep(powers[i]), p); long k = NextPowerOfTwo(2*n-1); long k2 = 1L << k; // k2 = 2^k Rb.SetSize(k); zz_pX b(INIT_SIZE, k2); zz_p rInv = inv(root); SetCoeff(b,n-1,one); // b[n-1] = 1 for (long i=1; i<n; i++) { long iSqr = MulMod(i, i, 2*n); // i^2 mod 2n zz_p bi = power(rInv,iSqr); SetCoeff(b,n-1+i, bi); // b[n-1+i] = b[n-1-i] = root^{-i^2} SetCoeff(b,n-1-i,bi); } TofftRep(Rb, b, k); }