static uint8x8_t paeth(uint8x8_t a, uint8x8_t b, uint8x8_t c) { uint8x8_t d, e; uint16x8_t p1, pa, pb, pc; p1 = vaddl_u8(a, b); /* a + b */ pc = vaddl_u8(c, c); /* c * 2 */ pa = vabdl_u8(b, c); /* pa */ pb = vabdl_u8(a, c); /* pb */ pc = vabdq_u16(p1, pc); /* pc */ p1 = vcleq_u16(pa, pb); /* pa <= pb */ pa = vcleq_u16(pa, pc); /* pa <= pc */ pb = vcleq_u16(pb, pc); /* pb <= pc */ p1 = vandq_u16(p1, pa); /* pa <= pb && pa <= pc */ d = vmovn_u16(pb); e = vmovn_u16(p1); d = vbsl_u8(d, b, c); e = vbsl_u8(e, a, d); return e; }
inline uint16x8_t vabdq(const uint16x8_t & v0, const uint16x8_t & v1) { return vabdq_u16(v0, v1); }