コード例 #1
0
static WEBP_INLINE uint32_t ClampedAddSubtractFull(const uint32_t* const c0,
                                                   const uint32_t* const c1,
                                                   const uint32_t* const c2) {
  const uint8x8_t p0 = vreinterpret_u8_u64(vcreate_u64(*c0));
  const uint8x8_t p1 = vreinterpret_u8_u64(vcreate_u64(*c1));
  const uint8x8_t p2 = vreinterpret_u8_u64(vcreate_u64(*c2));
  const uint16x8_t sum0 = vaddl_u8(p0, p1);                // add and widen
  const uint16x8_t sum1 = vqsubq_u16(sum0, vmovl_u8(p2));  // widen and subtract
  const uint8x8_t out = vqmovn_u16(sum1);                  // narrow and clamp
  return vget_lane_u32(vreinterpret_u32_u8(out), 0);
}
コード例 #2
0
static WEBP_INLINE uint32_t ClampedAddSubtractFull(const uint32_t* const c0,
        const uint32_t* const c1,
        const uint32_t* const c2) {
    const uint64x1_t C0 = { *c0, 0 }, C1 = { *c1, 0 }, C2 = { *c2, 0 };
    const uint8x8_t p0 = vreinterpret_u8_u64(C0);
    const uint8x8_t p1 = vreinterpret_u8_u64(C1);
    const uint8x8_t p2 = vreinterpret_u8_u64(C2);
    const uint16x8_t sum0 = vaddl_u8(p0, p1);                // add and widen
    const uint16x8_t sum1 = vqsubq_u16(sum0, vmovl_u8(p2));  // widen and subtract
    const uint8x8_t out = vqmovn_u16(sum1);                  // narrow and clamp
    uint32_t ret;
    vst1_lane_u32(&ret, vreinterpret_u32_u8(out), 0);
    return ret;
}
コード例 #3
0
ファイル: mw_neon.c プロジェクト: Daniel-Hwang/eeg
/* u16x8 saturated sub */
void mw_neon_mm_qsub_u16x8(unsigned short * A, int Row, int Col, unsigned short * B, unsigned short * C)
{
	uint16x8_t neon_a, neon_b, neon_c;
	int size = Row * Col;
	int i = 0;
	int k = 0;

	for (i = 8; i <= size ; i+=8)
	{
		k = i - 8;
		neon_a = vld1q_u16(A + k);
		neon_b = vld1q_u16(B + k);
		neon_c = vqsubq_u16(neon_a, neon_b);
		vst1q_u16(C + k, neon_c);
	}

	k = i - 8;
    for (i = 0; i < size % 8; i++)
	{
		C[k + i] = A[k + i] + B[k + i];
	}
}
コード例 #4
0
ファイル: vtransform.hpp プロジェクト: 007Indian/opencv
inline  uint16x8_t vqsubq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vqsubq_u16(v0, v1); }