Пример #1
1
void ar_stride3_vminall_u8_neon(const uint8_t* a,
                                uint32_t n,
                                uint8_t* line_results)
{
   uint8x16x3_t a_loaded;
   uint8x16_t line0_min = vdupq_n_u8(255);
   uint8x16_t line1_min = vdupq_n_u8(255);
   uint8x16_t line2_min = vdupq_n_u8(255);

   line_results[0] = 255;
   line_results[1] = 255;
   line_results[2] = 255;

   uint8_t line0_array[16];
   uint8_t line1_array[16];
   uint8_t line2_array[16];

   for (uint32_t i = 0; i < n; i += 48) {
      a_loaded = vld3q_u8(&(a[i]));
      line0_min = vminq_u8(a_loaded.val[0], line0_min);
      line1_min = vminq_u8(a_loaded.val[1], line1_min);
      line2_min = vminq_u8(a_loaded.val[2], line2_min);
   }

   vst1q_u8(line0_array, line0_min);
   vst1q_u8(line1_array, line1_min);
   vst1q_u8(line2_array, line2_min);
   
   for (uint32_t i = 0; i < 16; i++) {
      line_results[0] = ar_min_u8(line_results[0], line0_array[i]);
      line_results[1] = ar_min_u8(line_results[1], line1_array[i]);
      line_results[2] = ar_min_u8(line_results[2], line2_array[i]);
   }
}
/* Build an RGBA palette from the RGB and separate alpha palettes. */
void
png_riffle_palette_rgba(png_structrp png_ptr, png_row_infop row_info)
{
   png_const_colorp palette = png_ptr->palette;
   png_bytep riffled_palette = png_ptr->riffled_palette;
   png_const_bytep trans_alpha = png_ptr->trans_alpha;
   int num_trans = png_ptr->num_trans;
   int i;

   /* Initially black, opaque. */
   uint8x16x4_t w = {{
      vdupq_n_u8(0x00),
      vdupq_n_u8(0x00),
      vdupq_n_u8(0x00),
      vdupq_n_u8(0xff),
   }};

   if (row_info->bit_depth != 8)
   {
      png_error(png_ptr, "bit_depth must be 8 for png_riffle_palette_rgba");
      return;
   }

   /* First, riffle the RGB colours into a RGBA palette, the A value is
    * set to opaque for now.
    */
   for (i = 0; i < (1 << row_info->bit_depth); i += 16)
   {
      uint8x16x3_t v = vld3q_u8((png_const_bytep)(palette + i));
      w.val[0] = v.val[0];
      w.val[1] = v.val[1];
      w.val[2] = v.val[2];
      vst4q_u8(riffled_palette + (i << 2), w);
   }

   /* Fix up the missing transparency values. */
   for (i = 0; i < num_trans; i++)
      riffled_palette[(i << 2) + 3] = trans_alpha[i];
}
Пример #3
0
void test_vld3Qu8 (void)
{
  uint8x16x3_t out_uint8x16x3_t;

  out_uint8x16x3_t = vld3q_u8 (0);
}
Пример #4
0
// If we have ARM NEON support, pick off 48 bytes at a time:
while (srclen >= 48)
{
	uint8x16x3_t str;
	uint8x16x4_t res;

	// Load 48 bytes and deinterleave:
	str = vld3q_u8((uint8_t *)c);

	// Reshuffle:
	res = enc_reshuffle(str);

	// Translate reshuffled bytes to the Base64 alphabet:
	res = enc_translate(res);

	// Interleave and store result:
	vst4q_u8((uint8_t *)o, res);

	c += 48;	// 3 * 16 bytes of input
	o += 64;	// 4 * 16 bytes of output
	outl += 64;
	srclen -= 48;
}
Пример #5
0
inline  uint8x16x3_t vld3q(const u8  * ptr) { return  vld3q_u8(ptr); }