Ejemplo n.º 1
0
/* Routine optimized for unshuffling a buffer for a type size of 4 bytes. */
static void
    unshuffle4_neon(uint8_t * const
dest,
const uint8_t* const src,
const size_t vectorizable_elements,
const size_t total_elements
)
{
size_t i, j, k;
static const size_t bytesoftype = 4;
uint8x16x4_t r0;

for(
i = 0, k = 0;
i<vectorizable_elements*bytesoftype;
i += 64, k++) {
/* load 64 bytes to the structure r0 */
for(
j = 0;
j < 4; j++) {
r0.val[j] =
vld1q_u8(src
+
total_elements* j
+ k*16);
}
/* Store (with permutation) the results in the destination vector */
vst4q_u8(dest
+ k*64, r0);
}
}
Ejemplo n.º 2
0
// gcc 4.6.0 had some trouble (NDK-r9) with this code. We only use it for
// gcc-4.8.x at least.
static void ConvertBGRAToRGBA(const uint32_t* src,
                              int num_pixels, uint8_t* dst) {
  const uint32_t* const end = src + (num_pixels & ~15);
  for (; src < end; src += 16) {
    uint8x16x4_t pixel = vld4q_u8((uint8_t*)src);
    // swap B and R. (VSWP d0,d2 has no intrinsics equivalent!)
    const uint8x16_t tmp = pixel.val[0];
    pixel.val[0] = pixel.val[2];
    pixel.val[2] = tmp;
    vst4q_u8(dst, pixel);
    dst += 64;
  }
  VP8LConvertBGRAToRGBA_C(src, num_pixels & 15, dst);  // left-overs
}
Ejemplo n.º 3
0
void FilterEffect::forceValidPreMultipliedPixels()
{
    // Must operate on pre-multiplied results; other formats cannot have invalid pixels.
    if (!m_premultipliedImageResult)
        return;

    Uint8ClampedArray* imageArray = m_premultipliedImageResult.get();
    unsigned char* pixelData = imageArray->data();
    int pixelArrayLength = imageArray->length();

    // We must have four bytes per pixel, and complete pixels
    ASSERT(!(pixelArrayLength % 4));

#if HAVE(ARM_NEON_INTRINSICS)
    if (pixelArrayLength >= 64) {
        unsigned char* lastPixel = pixelData + (pixelArrayLength & ~0x3f);
        do {
            // Increments pixelData by 64.
            uint8x16x4_t sixteenPixels = vld4q_u8(pixelData);
            sixteenPixels.val[0] = vminq_u8(sixteenPixels.val[0], sixteenPixels.val[3]);
            sixteenPixels.val[1] = vminq_u8(sixteenPixels.val[1], sixteenPixels.val[3]);
            sixteenPixels.val[2] = vminq_u8(sixteenPixels.val[2], sixteenPixels.val[3]);
            vst4q_u8(pixelData, sixteenPixels);
            pixelData += 64;
        } while (pixelData < lastPixel);

        pixelArrayLength &= 0x3f;
        if (!pixelArrayLength)
            return;
    }
#endif

    int numPixels = pixelArrayLength / 4;

    // Iterate over each pixel, checking alpha and adjusting color components if necessary
    while (--numPixels >= 0) {
        // Alpha is the 4th byte in a pixel
        unsigned char a = *(pixelData + 3);
        // Clamp each component to alpha, and increment the pixel location
        for (int i = 0; i < 3; ++i) {
            if (*pixelData > a)
                *pixelData = a;
            ++pixelData;
        }
        // Increment for alpha
        ++pixelData;
    }
}
/* Build an RGBA palette from the RGB and separate alpha palettes. */
void
png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info)
{
   png_const_colorp palette = png_ptr->palette;
   png_bytep riffled_palette = png_ptr->riffled_palette;
   png_const_bytep trans_alpha = png_ptr->trans_alpha;
   int num_trans = png_ptr->num_trans;
   int i;

   /* Initially black, opaque. */
   uint8x16x4_t w = {{
      vdupq_n_u8(0x00),
      vdupq_n_u8(0x00),
      vdupq_n_u8(0x00),
      vdupq_n_u8(0xff),
   }};

   if (row_info->bit_depth != 8)
   {
      png_error(png_ptr, "bit_depth must be 8 for png_riffle_palette_rgba");
      return;
   }

   /* First, riffle the RGB colours into a RGBA palette, the A value is
    * set to opaque for now.
    */
   for (i = 0; i < (1 << row_info->bit_depth); i += 16)
   {
      uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i));
      w.val[0] = v.val[0];
      w.val[1] = v.val[1];
      w.val[2] = v.val[2];
      vst4q_u8(riffled_palette + (i << 2), w);
   }

   /* Fix up the missing transparency values. */
   for (i = 0; i < num_trans; i++)
      riffled_palette[(i << 2) + 3] = trans_alpha[i];
}
Ejemplo n.º 5
0
inline void vst4q(u8  * ptr, const uint8x16x4_t  & v) { return vst4q_u8(ptr,  v); }
Ejemplo n.º 6
0
void combineUYVY(const Size2D &size,
                 const u8 * srcyBase, ptrdiff_t srcyStride,
                 const u8 * srcuBase, ptrdiff_t srcuStride,
                 const u8 * srcvBase, ptrdiff_t srcvStride,
                 u8 * dstBase, ptrdiff_t dstStride)
{
    internal::assertSupportedConfiguration();
#ifdef CAROTENE_NEON
#ifndef __ANDROID__
    size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
#endif
    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;

    for (size_t i = 0u; i < size.height; ++i)
    {
        const u8 * srcy = internal::getRowPtr(srcyBase, srcyStride, i);
        const u8 * srcu = internal::getRowPtr(srcuBase, srcuStride, i);
        const u8 * srcv = internal::getRowPtr(srcvBase, srcvStride, i);
        u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
        size_t syj = 0u, sj = 0u, dj = 0u;

#ifndef __ANDROID__
        for (; sj < roiw32; sj += 32, syj += 64, dj += 128)
        {
            internal::prefetch(srcy + syj);
            internal::prefetch(srcu + sj);
            internal::prefetch(srcv + sj);

            uint8x16x2_t v_y = vld2q_u8(srcy + syj);
            uint8x16x4_t v_dst;
            v_dst.val[0] = vld1q_u8(srcu + sj);
            v_dst.val[1] = v_y.val[0];
            v_dst.val[2] = vld1q_u8(srcv + sj);
            v_dst.val[3] = v_y.val[1];
            vst4q_u8(dst + dj, v_dst);

            v_y = vld2q_u8(srcy + syj + 32);
            v_dst.val[0] = vld1q_u8(srcu + sj + 16);
            v_dst.val[1] = v_y.val[0];
            v_dst.val[2] = vld1q_u8(srcv + sj + 16);
            v_dst.val[3] = v_y.val[1];
            vst4q_u8(dst + dj + 64, v_dst);
        }
#endif

        for (; sj < roiw8; sj += 8, syj += 16, dj += 32)
        {
            uint8x8x2_t v_y = vld2_u8(srcy + syj);
            uint8x8x4_t v_dst;
            v_dst.val[0] = vld1_u8(srcu + sj);
            v_dst.val[1] = v_y.val[0];
            v_dst.val[2] = vld1_u8(srcv + sj);
            v_dst.val[3] = v_y.val[1];
            vst4_u8(dst + dj, v_dst);
        }

        for (; sj < size.width; ++sj, syj += 2, dj += 4)
        {
            dst[dj] = srcu[sj];
            dst[dj + 1] = srcy[syj];
            dst[dj + 2] = srcv[sj];
            dst[dj + 3] = srcy[syj + 1];
        }
    }
#else
    (void)size;
    (void)srcyBase;
    (void)srcyStride;
    (void)srcuBase;
    (void)srcuStride;
    (void)srcvBase;
    (void)srcvStride;
    (void)dstBase;
    (void)dstStride;
#endif
}