Пример #1
0
void test_vsri_nu64 (void)
{
    uint64x1_t out_uint64x1_t;
    uint64x1_t arg0_uint64x1_t;
    uint64x1_t arg1_uint64x1_t;

    out_uint64x1_t = vsri_n_u64 (arg0_uint64x1_t, arg1_uint64x1_t, 1);
}
Пример #2
0
uint64x1_t test_vsri_n_u64(uint64x1_t a1, uint64x1_t a2) {
  // CHECK: test_vsri_n_u64
  return vsri_n_u64(a1, a2, 1);
  // CHECK: llvm.arm64.neon.vsri.v1i64
  // CHECK_CODEGEN: sri     d0, d1, #1
}
Пример #3
0
int  Unpack11to16(const unsigned char* pcInput, unsigned short* pnOutput, const unsigned long  nInputSize)
{
	const unsigned char* pOrigInput = pcInput;
	uint8x8_t inputfield;
	uint16x4_t shiftfield;
	uint16_t test[4];

	unsigned long nElements = nInputSize / XN_INPUT_ELEMENT_SIZE; // floored
	unsigned long nNeededOutput = nElements * XN_OUTPUT_ELEMENT_SIZE;

	// Convert the 11bit packed data into 16bit shorts
	for (unsigned long nElem = 0; nElem < nElements; ++nElem)
	{
		// input:	0,  1,  2,3,  4,  5,  6,7,  8,  9,10
		//		-,---,---,-,---,---,---,-,---,---,-
		// bits:	8,3,5,6,2,8,1,7,4,4,7,1,8,2,6,5,3,8
		//		---,---,-----,---,---,-----,---,---
		// output:	  0,  1,    2,  3,  4,    5,  6,  7
#ifdef NEON
	        // Load 64 bits of data
		inputfield = vld1_u8(pcInput);
		// Reverse it since the endianess is wrong.
		inputfield = vrev16_u8(inputfield);

		// Debug -- let's make sure it looks ok by looking at 
		// it as a 16-bit element since that is ultimately what we want
		vst1_u16(test, inputfield);
		printf("i %04x %04x %04x %04x\n",
			test[0], test[1], test[2], test[3]);

		// Right shift by 5 bits to aling the first half-word
		// *note this does not compile since the compiler cannot deal with this 
		// conversion for some reason. It can deal with vshr_n_u32() and lower.
		// print out the results
		shiftfield = vshr_n_u64(inputfield, 5);
		vst1_u16( test,shiftfield);
		printf("1 %04x %04x %04x %04x\n",
			test[0], test[1], test[2], test[3]);
		
		// Right shift by 10 bits to aling the second half-word
		// print out the results
		shiftfield = vshr_n_u32(inputfield, 10);
		vst1_u16( test,shiftfield);
		printf("2 %04x %04x %04x %04x\n",
			test[0], test[1], test[2], test[3]);

		// Right shift by 15 bits to aling the third half-word
		// print out the results
		shiftfield = vshr_n_u32(inputfield, 15);
		vst1_u16( test,shiftfield);
		printf("3 %04x %04x %04x %04x\n",
			test[0], test[1], test[2], test[3]);

		// we would continue for all 8 half-word results
		
#else
		// This is the original Primesense code...
		// shift the output by 5 bits to the right to align 11 bits on the 16 bit field
		vsri_n_u64(leftfield, shiftfield, 5);

		vst1_u64((uint64_t*)pnOutput, shiftfield);

		pnOutput[0] = GetOutput((XN_TAKE_BITS(pcInput[0],8,0) << 3) | XN_TAKE_BITS(pcInput[1],3,5));
		pnOutput[1] = GetOutput((XN_TAKE_BITS(pcInput[1],5,0) << 6) | XN_TAKE_BITS(pcInput[2],6,2));
		pnOutput[2] = GetOutput((XN_TAKE_BITS(pcInput[2],2,0) << 9) | (XN_TAKE_BITS(pcInput[3],8,0) << 1) | XN_TAKE_BITS(pcInput[4],1,7));
		pnOutput[3] = GetOutput((XN_TAKE_BITS(pcInput[4],7,0) << 4) | XN_TAKE_BITS(pcInput[5],4,4));
		pnOutput[4] = GetOutput((XN_TAKE_BITS(pcInput[5],4,0) << 7) | XN_TAKE_BITS(pcInput[6],7,1));
		pnOutput[5] = GetOutput((XN_TAKE_BITS(pcInput[6],1,0) << 10) | (XN_TAKE_BITS(pcInput[7],8,0) << 2) | XN_TAKE_BITS(pcInput[8],2,6));
		pnOutput[6] = GetOutput((XN_TAKE_BITS(pcInput[8],6,0) << 5) | XN_TAKE_BITS(pcInput[9],5,3));
		pnOutput[7] = GetOutput((XN_TAKE_BITS(pcInput[9],3,0) << 8) | XN_TAKE_BITS(pcInput[10],8,0));
#endif

		pcInput += XN_INPUT_ELEMENT_SIZE;
		pnOutput += 8;
	}

	return (pcInput - pOrigInput);
}
Пример #4
0
uint64x1_t
test_vsri_n_u64 (uint64x1_t a, uint64x1_t b)
{
  return vsri_n_u64 (a, b, 9);
}