C++ (Cpp) ASSUME_ALIGNED Examples

Example #1

0

Show file

File: masstree_id.hpp Project: vanwaals/foedus_code

/**
 * Returns the number of 8-byte slices that the two strings share as prefix.
 * @param[in] left aligned string pointer. can be either big-endian or little endian.
 * @param[in] right aligned string pointer must be in same endian as left.
 * @param[in] max_slices min(left_len / 8, right_len / 8)
 * @return number of shared slices
 * @ingroup MASSTREE
 */
inline uint16_t count_common_slices(const void* left, const void* right, uint16_t max_slices) {
  const uint64_t* left_casted = reinterpret_cast<const uint64_t*>(ASSUME_ALIGNED(left, 8));
  const uint64_t* right_casted = reinterpret_cast<const uint64_t*>(ASSUME_ALIGNED(right, 8));
  for (uint16_t slices = 0; slices < max_slices; ++slices) {
    if (left_casted[slices] != right_casted[slices]) {
      return slices;
    }
  }
  return max_slices;
}

Example #2

0

Show file

File: wnuma.cpp Project: righnatios/0ad

// @return ratio between max/min time required to access one node's
// memory from each processor.
static double MeasureRelativeDistance()
{
	const size_t size = 32*MiB;
	void* mem = vm::Allocate(size);
	ASSUME_ALIGNED(mem, pageSize);

	const uintptr_t previousProcessorMask = os_cpu_SetThreadAffinityMask(os_cpu_ProcessorMask());

	double minTime = 1e10, maxTime = 0.0;
	for(size_t node = 0; node < numa_NumNodes(); node++)
	{
		const uintptr_t processorMask = numa_ProcessorMaskFromNode(node);
		os_cpu_SetThreadAffinityMask(processorMask);

		const double startTime = timer_Time();
		memset(mem, 0, size);
		const double elapsedTime = timer_Time() - startTime;

		minTime = std::min(minTime, elapsedTime);
		maxTime = std::max(maxTime, elapsedTime);
	}

	(void)os_cpu_SetThreadAffinityMask(previousProcessorMask);

	vm::Free(mem, size);

	return maxTime / minTime;
}

Example #3

0

Show file

File: hrtf_inc.c Project: xxxbxxx/openal-soft

#include "alu.h"
#include "defs.h"


static inline void ApplyCoeffs(ALsizei Offset, ALfloat (*RESTRICT Values)[2],
                               const ALsizei irSize,
                               const ALfloat (*RESTRICT Coeffs)[2],
                               ALfloat left, ALfloat right);


void MixHrtf(ALfloat *RESTRICT LeftOut, ALfloat *RESTRICT RightOut,
             const ALfloat *data, ALsizei Offset, ALsizei OutPos,
             const ALsizei IrSize, MixHrtfParams *hrtfparams, HrtfState *hrtfstate,
             ALsizei BufferSize)
{
    const ALfloat (*Coeffs)[2] = ASSUME_ALIGNED(hrtfparams->Coeffs, 16);
    const ALsizei Delay[2] = { hrtfparams->Delay[0], hrtfparams->Delay[1] };
    const ALfloat gainstep = hrtfparams->GainStep;
    const ALfloat gain = hrtfparams->Gain;
    ALfloat g, stepcount = 0.0f;
    ALfloat left, right;
    ALsizei i;

    ASSUME(IrSize >= 4);
    ASSUME(BufferSize > 0);

    LeftOut  += OutPos;
    RightOut += OutPos;
    for(i = 0;i < BufferSize;i++)
    {
        hrtfstate->History[Offset&HRTF_HISTORY_MASK] = *(data++);

Example #4

0

Show file

File: dct.c Project: ilyatikhonov/jpeg2png

// Normalized 8x8 DCT
void dct8x8s(float a[64]) {
        ASSUME_ALIGNED(a);
        float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
        float xr, xi;

        for (unsigned j = 0; j <= 7; j++) {
                x0r = a[0*8+j] + a[7*8+j];
                x1r = a[0*8+j] - a[7*8+j];
                x0i = a[2*8+j] + a[5*8+j];
                x1i = a[2*8+j] - a[5*8+j];
                x2r = a[4*8+j] + a[3*8+j];
                x3r = a[4*8+j] - a[3*8+j];
                x2i = a[6*8+j] + a[1*8+j];
                x3i = a[6*8+j] - a[1*8+j];
                xr = x0r + x2r;
                xi = x0i + x2i;
                a[0*8+j] = C8_4R * (xr + xi);
                a[4*8+j] = C8_4R * (xr - xi);
                xr = x0r - x2r;
                xi = x0i - x2i;
                a[2*8+j] = C8_2R * xr - C8_2I * xi;
                a[6*8+j] = C8_2R * xi + C8_2I * xr;
                xr = W8_4R * (x1i - x3i);
                x1i = W8_4R * (x1i + x3i);
                x3i = x1i - x3r;
                x1i += x3r;
                x3r = x1r - xr;
                x1r += xr;
                a[1*8+j] = C8_1R * x1r - C8_1I * x1i;
                a[7*8+j] = C8_1R * x1i + C8_1I * x1r;
                a[3*8+j] = C8_3R * x3r - C8_3I * x3i;
                a[5*8+j] = C8_3R * x3i + C8_3I * x3r;
        }
        for (unsigned j = 0; j <= 7; j++) {
                x0r = a[j*8+0] + a[j*8+7];
                x1r = a[j*8+0] - a[j*8+7];
                x0i = a[j*8+2] + a[j*8+5];
                x1i = a[j*8+2] - a[j*8+5];
                x2r = a[j*8+4] + a[j*8+3];
                x3r = a[j*8+4] - a[j*8+3];
                x2i = a[j*8+6] + a[j*8+1];
                x3i = a[j*8+6] - a[j*8+1];
                xr = x0r + x2r;
                xi = x0i + x2i;
                a[j*8+0] = C8_4R * (xr + xi);
                a[j*8+4] = C8_4R * (xr - xi);
                xr = x0r - x2r;
                xi = x0i - x2i;
                a[j*8+2] = C8_2R * xr - C8_2I * xi;
                a[j*8+6] = C8_2R * xi + C8_2I * xr;
                xr = W8_4R * (x1i - x3i);
                x1i = W8_4R * (x1i + x3i);
                x3i = x1i - x3r;
                x1i += x3r;
                x3r = x1r - xr;
                x1r += xr;
                a[j*8+1] = C8_1R * x1r - C8_1I * x1i;
                a[j*8+7] = C8_1R * x1i + C8_1I * x1r;
                a[j*8+3] = C8_3R * x3r - C8_3I * x3i;
                a[j*8+5] = C8_3R * x3i + C8_3I * x3r;
        }
}

Example #5

0

Show file

File: dct.c Project: ilyatikhonov/jpeg2png

// Normalized 8x8 IDCT
void idct8x8s(float a[64]) {
        ASSUME_ALIGNED(a);
        float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
        float xr, xi;

        for (unsigned j = 0; j <= 7; j++) {
                x1r = C8_1R * a[1*8+j] + C8_1I * a[7*8+j];
                x1i = C8_1R * a[7*8+j] - C8_1I * a[1*8+j];
                x3r = C8_3R * a[3*8+j] + C8_3I * a[5*8+j];
                x3i = C8_3R * a[5*8+j] - C8_3I * a[3*8+j];
                xr = x1r - x3r;
                xi = x1i + x3i;
                x1r += x3r;
                x3i -= x1i;
                x1i = W8_4R * (xr + xi);
                x3r = W8_4R * (xr - xi);
                xr = C8_2R * a[2*8+j] + C8_2I * a[6*8+j];
                xi = C8_2R * a[6*8+j] - C8_2I * a[2*8+j];
                x0r = C8_4R * (a[0*8+j] + a[4*8+j]);
                x0i = C8_4R * (a[0*8+j] - a[4*8+j]);
                x2r = x0r - xr;
                x2i = x0i - xi;
                x0r += xr;
                x0i += xi;
                a[0*8+j] = x0r + x1r;
                a[7*8+j] = x0r - x1r;
                a[2*8+j] = x0i + x1i;
                a[5*8+j] = x0i - x1i;
                a[4*8+j] = x2r - x3i;
                a[3*8+j] = x2r + x3i;
                a[6*8+j] = x2i - x3r;
                a[1*8+j] = x2i + x3r;        
        }
        for (unsigned j = 0; j <= 7; j++) {
                x1r = C8_1R * a[j*8+1] + C8_1I * a[j*8+7];
                x1i = C8_1R * a[j*8+7] - C8_1I * a[j*8+1];
                x3r = C8_3R * a[j*8+3] + C8_3I * a[j*8+5];
                x3i = C8_3R * a[j*8+5] - C8_3I * a[j*8+3];
                xr = x1r - x3r;
                xi = x1i + x3i;
                x1r += x3r;
                x3i -= x1i;
                x1i = W8_4R * (xr + xi);
                x3r = W8_4R * (xr - xi);
                xr = C8_2R * a[j*8+2] + C8_2I * a[j*8+6];
                xi = C8_2R * a[j*8+6] - C8_2I * a[j*8+2];
                x0r = C8_4R * (a[j*8+0] + a[j*8+4]);
                x0i = C8_4R * (a[j*8+0] - a[j*8+4]);
                x2r = x0r - xr;
                x2i = x0i - xi;
                x0r += xr;
                x0i += xi;
                a[j*8+0] = x0r + x1r;
                a[j*8+7] = x0r - x1r;
                a[j*8+2] = x0i + x1i;
                a[j*8+5] = x0i - x1i;
                a[j*8+4] = x2r - x3i;
                a[j*8+3] = x2r + x3i;
                a[j*8+6] = x2i - x3r;
                a[j*8+1] = x2i + x3r;
        }
}