void XnUncompressedYUVtoRGBImageProcessor::ProcessFramePacketChunk(const XnSensorProtocolResponseHeader* pHeader, const XnUChar* pData, XnUInt32 nDataOffset, XnUInt32 nDataSize)
{
	XN_PROFILING_START_SECTION("XnUncompressedYUVtoRGBImageProcessor::ProcessFramePacketChunk")

	XnBuffer* pWriteBuffer = GetWriteBuffer();

	if (m_ContinuousBuffer.GetSize() != 0)
	{
		// fill in to a whole element
		XnUInt32 nReadBytes = XN_MIN(nDataSize, XN_YUV_TO_RGB_INPUT_ELEMENT_SIZE - m_ContinuousBuffer.GetSize());
		m_ContinuousBuffer.UnsafeWrite(pData, nReadBytes);
		pData += nReadBytes;
		nDataSize -= nReadBytes;

		if (m_ContinuousBuffer.GetSize() == XN_YUV_TO_RGB_INPUT_ELEMENT_SIZE)
		{
			if (CheckWriteBufferForOverflow(XN_YUV_TO_RGB_OUTPUT_ELEMENT_SIZE))
			{
				// process it
				XnUInt32 nActualRead = 0;
				XnUInt32 nOutputSize = pWriteBuffer->GetFreeSpaceInBuffer();
				YUV422ToRGB888(m_ContinuousBuffer.GetData(), pWriteBuffer->GetUnsafeWritePointer(), XN_YUV_TO_RGB_INPUT_ELEMENT_SIZE, &nActualRead, &nOutputSize);
				pWriteBuffer->UnsafeUpdateSize(XN_YUV_TO_RGB_OUTPUT_ELEMENT_SIZE);
			}

			m_ContinuousBuffer.Reset();
		}
	}

	if (CheckWriteBufferForOverflow(nDataSize / XN_YUV_TO_RGB_INPUT_ELEMENT_SIZE * XN_YUV_TO_RGB_OUTPUT_ELEMENT_SIZE))
	{
		XnUInt32 nActualRead = 0;
		XnUInt32 nOutputSize = pWriteBuffer->GetFreeSpaceInBuffer();
		YUV422ToRGB888(pData, pWriteBuffer->GetUnsafeWritePointer(), nDataSize, &nActualRead, &nOutputSize);
		pWriteBuffer->UnsafeUpdateSize(nOutputSize);
		pData += nActualRead;
		nDataSize -= nActualRead;

		// if we have any bytes left, store them for next packet.
		if (nDataSize > 0)
		{
			// no need to check for overflow. there can not be a case in which more than XN_INPUT_ELEMENT_SIZE
			// are left.
			m_ContinuousBuffer.UnsafeWrite(pData, nDataSize);
		}
	}

	XN_PROFILING_END_SECTION
}
void XnUncompressedDepthProcessor::ProcessFramePacketChunk(const XnSensorProtocolResponseHeader* pHeader, const XnUChar* pData, XnUInt32 nDataOffset, XnUInt32 nDataSize)
{
	XN_PROFILING_START_SECTION("XnUncompressedDepthProcessor::ProcessFramePacketChunk")

	// when depth is uncompressed, we can just copy it directly to write buffer
	XnBuffer* pWriteBuffer = GetWriteBuffer();

	// make sure we have enough room
	if (CheckWriteBufferForOverflow(nDataSize))
	{
		// sometimes, when packets are lost, we get uneven number of bytes, so we need to complete
		// one byte, in order to keep UINT16 alignment
		if (nDataSize % 2 != 0)
		{
			nDataSize--;
			pData++;
		}

		// copy values. Make sure we do not get corrupted shifts
		XnUInt16* pRaw = (XnUInt16*)(pData);
		XnUInt16* pRawEnd = (XnUInt16*)(pData + nDataSize);
		XnDepthPixel* pWriteBuf = (XnDepthPixel*)pWriteBuffer->GetUnsafeWritePointer();

		while (pRaw < pRawEnd)
		{
			*pWriteBuf = GetOutput(XN_MIN(*pRaw, XN_DEVICE_SENSOR_MAX_SHIFT_VALUE-1));
			++pRaw;
			++pWriteBuf;
		}

 		pWriteBuffer->UnsafeUpdateSize(nDataSize);
	}

	XN_PROFILING_END_SECTION
}
void XnPassThroughImageProcessor::ProcessFramePacketChunk(const XnSensorProtocolResponseHeader* /*pHeader*/, const XnUChar* pData, XnUInt32 /*nDataOffset*/, XnUInt32 nDataSize)
{
	XN_PROFILING_START_SECTION("XnUncompressedYUVImageProcessor::ProcessFramePacketChunk")

	// when image is uncompressed, we can just copy it directly to write buffer
	XnBuffer* pWriteBuffer = GetWriteBuffer();

	// make sure we have enough room
	if (CheckWriteBufferForOverflow(nDataSize))
	{
		pWriteBuffer->UnsafeWrite(pData, nDataSize);
	}

	XN_PROFILING_END_SECTION
}
void XnUncompressedBayerProcessor::ProcessFramePacketChunk(const XnSensorProtocolResponseHeader* pHeader, const XnUChar* pData, XnUInt32 nDataOffset, XnUInt32 nDataSize)
{
	XN_PROFILING_START_SECTION("XnUncompressedBayerProcessor::ProcessFramePacketChunk")

	// if output format is Gray8, we can write directly to output buffer. otherwise, we need
	// to write to a temp buffer.
	XnBuffer* pWriteBuffer = (GetStream()->GetOutputFormat() == XN_OUTPUT_FORMAT_GRAYSCALE8) ? GetWriteBuffer() : &m_UncompressedBayerBuffer;

	// make sure we have enough room
	if (CheckWriteBufferForOverflow(nDataSize))
	{
		pWriteBuffer->UnsafeWrite(pData, nDataSize);
	}

	XN_PROFILING_END_SECTION
}
XnStatus XnPacked11DepthProcessor::Unpack11to16(const XnUInt8* pcInput, const XnUInt32 nInputSize, XnUInt32* pnActualRead)
{
	const XnUInt8* pOrigInput = pcInput;

	XnUInt32 nElements = nInputSize / XN_INPUT_ELEMENT_SIZE; // floored
	XnUInt32 nNeededOutput = nElements * XN_OUTPUT_ELEMENT_SIZE;

	*pnActualRead = 0;
	XnBuffer* pWriteBuffer = GetWriteBuffer();

	if (!CheckWriteBufferForOverflow(nNeededOutput))
	{
		return XN_STATUS_OUTPUT_BUFFER_OVERFLOW;
	}

	XnUInt16* pnOutput = (XnUInt16*)pWriteBuffer->GetUnsafeWritePointer();

	// Convert the 11bit packed data into 16bit shorts
	for (XnUInt32 nElem = 0; nElem < nElements; ++nElem)
	{
		// input:	0,  1,  2,3,  4,  5,  6,7,  8,  9,10
		//			-,---,---,-,---,---,---,-,---,---,-
		// bits:	8,3,5,6,2,8,1,7,4,4,7,1,8,2,6,5,3,8
		//			---,---,-----,---,---,-----,---,---
		// output:	  0,  1,    2,  3,  4,    5,  6,  7

		pnOutput[0] = GetOutput((XN_TAKE_BITS(pcInput[0],8,0) << 3) | XN_TAKE_BITS(pcInput[1],3,5));
		pnOutput[1] = GetOutput((XN_TAKE_BITS(pcInput[1],5,0) << 6) | XN_TAKE_BITS(pcInput[2],6,2));
		pnOutput[2] = GetOutput((XN_TAKE_BITS(pcInput[2],2,0) << 9) | (XN_TAKE_BITS(pcInput[3],8,0) << 1) | XN_TAKE_BITS(pcInput[4],1,7));
		pnOutput[3] = GetOutput((XN_TAKE_BITS(pcInput[4],7,0) << 4) | XN_TAKE_BITS(pcInput[5],4,4));
		pnOutput[4] = GetOutput((XN_TAKE_BITS(pcInput[5],4,0) << 7) | XN_TAKE_BITS(pcInput[6],7,1));
		pnOutput[5] = GetOutput((XN_TAKE_BITS(pcInput[6],1,0) << 10) | (XN_TAKE_BITS(pcInput[7],8,0) << 2) | XN_TAKE_BITS(pcInput[8],2,6));
		pnOutput[6] = GetOutput((XN_TAKE_BITS(pcInput[8],6,0) << 5) | XN_TAKE_BITS(pcInput[9],5,3));
		pnOutput[7] = GetOutput((XN_TAKE_BITS(pcInput[9],3,0) << 8) | XN_TAKE_BITS(pcInput[10],8,0));

		pcInput += XN_INPUT_ELEMENT_SIZE;
		pnOutput += 8;
	}

	*pnActualRead = (XnUInt32)(pcInput - pOrigInput);
	pWriteBuffer->UnsafeUpdateSize(nNeededOutput);

	return XN_STATUS_OK;
}
XnStatus XnPacked11DepthProcessor::Unpack11to16(const XnUInt8* pcInput, const XnUInt32 nInputSize, XnUInt32* pnActualRead)
{
	const XnUInt8* pOrigInput = pcInput;

	XnUInt32 nElements = nInputSize / XN_INPUT_ELEMENT_SIZE; // floored
	XnUInt32 nNeededOutput = nElements * XN_OUTPUT_ELEMENT_SIZE;

	*pnActualRead = 0;
	XnBuffer* pWriteBuffer = GetWriteBuffer();

	// Check there is enough room for the depth pixels
	if (!CheckWriteBufferForOverflow(nNeededOutput))
	{
		return XN_STATUS_OUTPUT_BUFFER_OVERFLOW;
	}

	XnUInt16* pnOutput = (XnUInt16*)pWriteBuffer->GetUnsafeWritePointer();

	XnUInt16 a0,a1,a2,a3,a4,a5,a6,a7;
#ifdef XN_NEON
	XnUInt16 depth[8];
	uint16x8_t Q0;
#endif

	// Convert the 11bit packed data into 16bit shorts
	for (XnUInt32 nElem = 0; nElem < nElements; ++nElem)
	{
    if(m_nScaleFactor > 1)
    {
      XnUInt32 px = m_nOffsetInFrame%m_CurrentVideoMode.resolutionX;
      XnUInt32 py = (m_nOffsetInFrame)/m_CurrentVideoMode.resolutionX;

      if(py%m_nScaleFactor != 0)
      {
        // Skip as many pixels as possible
        XnUInt32 nEltsToSkip =
            XN_MIN(nElements - nElem,
                   (m_CurrentVideoMode.resolutionX - px)/8
                   + (m_nScaleFactor-(py%m_nScaleFactor) - 1)*m_CurrentVideoMode.resolutionX/8);

        //      ::memset(pnOutput, 0, nEltsToSkip*8*sizeof(XnUInt16));
        pcInput += nEltsToSkip*XN_INPUT_ELEMENT_SIZE;
        pnOutput += nEltsToSkip*8;
        m_nOffsetInFrame += nEltsToSkip*8;
        nElem += (nEltsToSkip-1);
        continue;
      }
    }

    // input:  0,  1,  2,3,  4,  5,  6,7,  8,  9,10
    //         -,---,---,-,---,---,---,-,---,---,-
    // bits:   8,3,5,6,2,8,1,7,4,4,7,1,8,2,6,5,3,8
    //         ---,---,-----,---,---,-----,---,---
    // output:   0,  1,    2,  3,  4,    5,  6,  7
    if(m_nScaleFactor == 2)
    {
      a0 = (XN_TAKE_BITS(pcInput[0],8,0) << 3) | XN_TAKE_BITS(pcInput[1],3,5);
      a2 = (XN_TAKE_BITS(pcInput[2],2,0) << 9) | (XN_TAKE_BITS(pcInput[3],8,0) << 1) | XN_TAKE_BITS(pcInput[4],1,7);
      a4 = (XN_TAKE_BITS(pcInput[5],4,0) << 7) | XN_TAKE_BITS(pcInput[6],7,1);
      a6 = (XN_TAKE_BITS(pcInput[8],6,0) << 5) | XN_TAKE_BITS(pcInput[9],5,3);
    }
    else if(m_nScaleFactor == 4)
    {
      a0 = (XN_TAKE_BITS(pcInput[0],8,0) << 3) | XN_TAKE_BITS(pcInput[1],3,5);
      a4 = (XN_TAKE_BITS(pcInput[5],4,0) << 7) | XN_TAKE_BITS(pcInput[6],7,1);
    }
    else
    {
      a0 = (XN_TAKE_BITS(pcInput[0],8,0) << 3) | XN_TAKE_BITS(pcInput[1],3,5);
      a1 = (XN_TAKE_BITS(pcInput[1],5,0) << 6) | XN_TAKE_BITS(pcInput[2],6,2);
      a2 = (XN_TAKE_BITS(pcInput[2],2,0) << 9) | (XN_TAKE_BITS(pcInput[3],8,0) << 1) | XN_TAKE_BITS(pcInput[4],1,7);
      a3 = (XN_TAKE_BITS(pcInput[4],7,0) << 4) | XN_TAKE_BITS(pcInput[5],4,4);
      a4 = (XN_TAKE_BITS(pcInput[5],4,0) << 7) | XN_TAKE_BITS(pcInput[6],7,1);
      a5 = (XN_TAKE_BITS(pcInput[6],1,0) << 10) | (XN_TAKE_BITS(pcInput[7],8,0) << 2) | XN_TAKE_BITS(pcInput[8],2,6);
      a6 = (XN_TAKE_BITS(pcInput[8],6,0) << 5) | XN_TAKE_BITS(pcInput[9],5,3);
      a7 = (XN_TAKE_BITS(pcInput[9],3,0) << 8) | XN_TAKE_BITS(pcInput[10],8,0);
    }



#ifdef XN_NEON
		depth[0] = GetOutput(a0);
		depth[1] = GetOutput(a1);
		depth[2] = GetOutput(a2);
		depth[3] = GetOutput(a3);
		depth[4] = GetOutput(a4);
		depth[5] = GetOutput(a5);
		depth[6] = GetOutput(a6);
		depth[7] = GetOutput(a7);

		// Load
		Q0 = vld1q_u16(depth);
		// Store
		vst1q_u16(pnOutput, Q0);
#else
    if(m_nScaleFactor == 2)
    {
      *pnOutput++ = GetOutput(a0);
      *pnOutput++ = 0;
      *pnOutput++ = GetOutput(a2);
      *pnOutput++ = 0;
      *pnOutput++ = GetOutput(a4);
      *pnOutput++ = 0;
      *pnOutput++ = GetOutput(a6);
      *pnOutput++ = 0;
    }
    else if(m_nScaleFactor == 4)
    {
      *pnOutput++ = GetOutput(a0);
      *pnOutput++ = 0;
      *pnOutput++ = 0;
      *pnOutput++ = 0;
      *pnOutput++ = GetOutput(a4);
      *pnOutput++ = 0;
      *pnOutput++ = 0;
      *pnOutput++ = 0;
    }
    else
    {
      *pnOutput++ = GetOutput(a0);
      *pnOutput++ = GetOutput(a1);
      *pnOutput++ = GetOutput(a2);
      *pnOutput++ = GetOutput(a3);
      *pnOutput++ = GetOutput(a4);
      *pnOutput++ = GetOutput(a5);
      *pnOutput++ = GetOutput(a6);
      *pnOutput++ = GetOutput(a7);
    }
#endif

		pcInput += XN_INPUT_ELEMENT_SIZE;
    m_nOffsetInFrame+=8;
	}

	*pnActualRead = (XnUInt32)(pcInput - pOrigInput);
	pWriteBuffer->UnsafeUpdateSize(nNeededOutput);

	return XN_STATUS_OK;
}