/** * Returns the number of 8-byte slices that the two strings share as prefix. * @param[in] left aligned string pointer. can be either big-endian or little endian. * @param[in] right aligned string pointer must be in same endian as left. * @param[in] max_slices min(left_len / 8, right_len / 8) * @return number of shared slices * @ingroup MASSTREE */ inline uint16_t count_common_slices(const void* left, const void* right, uint16_t max_slices) { const uint64_t* left_casted = reinterpret_cast<const uint64_t*>(ASSUME_ALIGNED(left, 8)); const uint64_t* right_casted = reinterpret_cast<const uint64_t*>(ASSUME_ALIGNED(right, 8)); for (uint16_t slices = 0; slices < max_slices; ++slices) { if (left_casted[slices] != right_casted[slices]) { return slices; } } return max_slices; }
// @return ratio between max/min time required to access one node's // memory from each processor. static double MeasureRelativeDistance() { const size_t size = 32*MiB; void* mem = vm::Allocate(size); ASSUME_ALIGNED(mem, pageSize); const uintptr_t previousProcessorMask = os_cpu_SetThreadAffinityMask(os_cpu_ProcessorMask()); double minTime = 1e10, maxTime = 0.0; for(size_t node = 0; node < numa_NumNodes(); node++) { const uintptr_t processorMask = numa_ProcessorMaskFromNode(node); os_cpu_SetThreadAffinityMask(processorMask); const double startTime = timer_Time(); memset(mem, 0, size); const double elapsedTime = timer_Time() - startTime; minTime = std::min(minTime, elapsedTime); maxTime = std::max(maxTime, elapsedTime); } (void)os_cpu_SetThreadAffinityMask(previousProcessorMask); vm::Free(mem, size); return maxTime / minTime; }
#include "alu.h" #include "defs.h" static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*RESTRICT Values)[2], const ALsizei irSize, const ALfloat (*RESTRICT Coeffs)[2], ALfloat left, ALfloat right); void MixHrtf(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut, const ALfloat *data, ALsizei Offset, ALsizei OutPos, const ALsizei IrSize, MixHrtfParams *hrtfparams, HrtfState *hrtfstate, ALsizei BufferSize) { const ALfloat (*Coeffs)[2] = ASSUME_ALIGNED(hrtfparams->Coeffs, 16); const ALsizei Delay[2] = { hrtfparams->Delay[0], hrtfparams->Delay[1] }; const ALfloat gainstep = hrtfparams->GainStep; const ALfloat gain = hrtfparams->Gain; ALfloat g, stepcount = 0.0f; ALfloat left, right; ALsizei i; ASSUME(IrSize >= 4); ASSUME(BufferSize > 0); LeftOut += OutPos; RightOut += OutPos; for(i = 0;i < BufferSize;i++) { hrtfstate->History[Offset&HRTF_HISTORY_MASK] = *(data++);
// Normalized 8x8 DCT void dct8x8s(float a[64]) { ASSUME_ALIGNED(a); float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; float xr, xi; for (unsigned j = 0; j <= 7; j++) { x0r = a[0*8+j] + a[7*8+j]; x1r = a[0*8+j] - a[7*8+j]; x0i = a[2*8+j] + a[5*8+j]; x1i = a[2*8+j] - a[5*8+j]; x2r = a[4*8+j] + a[3*8+j]; x3r = a[4*8+j] - a[3*8+j]; x2i = a[6*8+j] + a[1*8+j]; x3i = a[6*8+j] - a[1*8+j]; xr = x0r + x2r; xi = x0i + x2i; a[0*8+j] = C8_4R * (xr + xi); a[4*8+j] = C8_4R * (xr - xi); xr = x0r - x2r; xi = x0i - x2i; a[2*8+j] = C8_2R * xr - C8_2I * xi; a[6*8+j] = C8_2R * xi + C8_2I * xr; xr = W8_4R * (x1i - x3i); x1i = W8_4R * (x1i + x3i); x3i = x1i - x3r; x1i += x3r; x3r = x1r - xr; x1r += xr; a[1*8+j] = C8_1R * x1r - C8_1I * x1i; a[7*8+j] = C8_1R * x1i + C8_1I * x1r; a[3*8+j] = C8_3R * x3r - C8_3I * x3i; a[5*8+j] = C8_3R * x3i + C8_3I * x3r; } for (unsigned j = 0; j <= 7; j++) { x0r = a[j*8+0] + a[j*8+7]; x1r = a[j*8+0] - a[j*8+7]; x0i = a[j*8+2] + a[j*8+5]; x1i = a[j*8+2] - a[j*8+5]; x2r = a[j*8+4] + a[j*8+3]; x3r = a[j*8+4] - a[j*8+3]; x2i = a[j*8+6] + a[j*8+1]; x3i = a[j*8+6] - a[j*8+1]; xr = x0r + x2r; xi = x0i + x2i; a[j*8+0] = C8_4R * (xr + xi); a[j*8+4] = C8_4R * (xr - xi); xr = x0r - x2r; xi = x0i - x2i; a[j*8+2] = C8_2R * xr - C8_2I * xi; a[j*8+6] = C8_2R * xi + C8_2I * xr; xr = W8_4R * (x1i - x3i); x1i = W8_4R * (x1i + x3i); x3i = x1i - x3r; x1i += x3r; x3r = x1r - xr; x1r += xr; a[j*8+1] = C8_1R * x1r - C8_1I * x1i; a[j*8+7] = C8_1R * x1i + C8_1I * x1r; a[j*8+3] = C8_3R * x3r - C8_3I * x3i; a[j*8+5] = C8_3R * x3i + C8_3I * x3r; } }
// Normalized 8x8 IDCT void idct8x8s(float a[64]) { ASSUME_ALIGNED(a); float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; float xr, xi; for (unsigned j = 0; j <= 7; j++) { x1r = C8_1R * a[1*8+j] + C8_1I * a[7*8+j]; x1i = C8_1R * a[7*8+j] - C8_1I * a[1*8+j]; x3r = C8_3R * a[3*8+j] + C8_3I * a[5*8+j]; x3i = C8_3R * a[5*8+j] - C8_3I * a[3*8+j]; xr = x1r - x3r; xi = x1i + x3i; x1r += x3r; x3i -= x1i; x1i = W8_4R * (xr + xi); x3r = W8_4R * (xr - xi); xr = C8_2R * a[2*8+j] + C8_2I * a[6*8+j]; xi = C8_2R * a[6*8+j] - C8_2I * a[2*8+j]; x0r = C8_4R * (a[0*8+j] + a[4*8+j]); x0i = C8_4R * (a[0*8+j] - a[4*8+j]); x2r = x0r - xr; x2i = x0i - xi; x0r += xr; x0i += xi; a[0*8+j] = x0r + x1r; a[7*8+j] = x0r - x1r; a[2*8+j] = x0i + x1i; a[5*8+j] = x0i - x1i; a[4*8+j] = x2r - x3i; a[3*8+j] = x2r + x3i; a[6*8+j] = x2i - x3r; a[1*8+j] = x2i + x3r; } for (unsigned j = 0; j <= 7; j++) { x1r = C8_1R * a[j*8+1] + C8_1I * a[j*8+7]; x1i = C8_1R * a[j*8+7] - C8_1I * a[j*8+1]; x3r = C8_3R * a[j*8+3] + C8_3I * a[j*8+5]; x3i = C8_3R * a[j*8+5] - C8_3I * a[j*8+3]; xr = x1r - x3r; xi = x1i + x3i; x1r += x3r; x3i -= x1i; x1i = W8_4R * (xr + xi); x3r = W8_4R * (xr - xi); xr = C8_2R * a[j*8+2] + C8_2I * a[j*8+6]; xi = C8_2R * a[j*8+6] - C8_2I * a[j*8+2]; x0r = C8_4R * (a[j*8+0] + a[j*8+4]); x0i = C8_4R * (a[j*8+0] - a[j*8+4]); x2r = x0r - xr; x2i = x0i - xi; x0r += xr; x0i += xi; a[j*8+0] = x0r + x1r; a[j*8+7] = x0r - x1r; a[j*8+2] = x0i + x1i; a[j*8+5] = x0i - x1i; a[j*8+4] = x2r - x3i; a[j*8+3] = x2r + x3i; a[j*8+6] = x2i - x3r; a[j*8+1] = x2i + x3r; } }