void dmz_deinterleave_RGBA_to_R(uint8_t *source, uint8_t *dest, int size) { #if DMZ_HAS_NEON_COMPILETIME if (dmz_has_neon_runtime()) { assert(size >= 16); // required for the vectorized handling of leftover_bytes; also, a reasonable expectation! for (int offset = 0; offset + 15 < size; offset += 16) { uint8x16x4_t r1 = vld4q_u8(&source[offset * 4]); vst1q_u8(&dest[offset], r1.val[0]); } // use "overlapping" to process the remaining bytes // See http://community.arm.com/groups/processors/blog/2010/05/10/coding-for-neon--part-2-dealing-with-leftovers if (size % 16 > 0) { int offset = size - 16; uint8x16x4_t r1 = vld4q_u8(&source[offset * 4]); vst1q_u8(&dest[offset], r1.val[0]); } } else #endif { for (int offset = 0; offset + 7 < size; offset += 8) { int bufferOffset = offset * 4; dest[offset] = source[bufferOffset]; dest[offset + 1] = source[bufferOffset + (1 * 4)]; dest[offset + 2] = source[bufferOffset + (2 * 4)]; dest[offset + 3] = source[bufferOffset + (3 * 4)]; dest[offset + 4] = source[bufferOffset + (4 * 4)]; dest[offset + 5] = source[bufferOffset + (5 * 4)]; dest[offset + 6] = source[bufferOffset + (6 * 4)]; dest[offset + 7] = source[bufferOffset + (7 * 4)]; } int leftover_bytes = size % 8; // each RGBA pixel is 4 bytes, so can assume size % 4 == 0 if (leftover_bytes > 0) { for (int offset = size - leftover_bytes; offset < size; offset += 4) { int bufferOffset = offset * 4; dest[offset] = source[bufferOffset]; dest[offset + 1] = source[bufferOffset + (1 * 4)]; dest[offset + 2] = source[bufferOffset + (2 * 4)]; dest[offset + 3] = source[bufferOffset + (3 * 4)]; } } } }
extern "C" jboolean JNICALL Java_io_card_payment_CardScanner_nUseNeon() { return (dmz_has_neon_runtime()); }