Example #1
0
char *_base64_encode_avx2(char *out, const unsigned char *in, size_t n, int options)
{
    size_t i;
    size_t o = 0;

    const char (*alphabet)[2] = _base64_alphabet_precombined;
    if (options & Base64UseUrlAlphabet)
        alphabet = _base64url_alphabet_precombined;

    for (i = 0; n - i >= 48; i += 48) {
        // read 48 bytes and duplicate each 16-byte chunk in the high part of the register
        __m256i chunk1 = _mm256_broadcastsi128_si256(* (const __m128i *)&in[i+0]);
        __m256i chunk2 = _mm256_broadcastsi128_si256(* (const __m128i *)&in[i+16]);
        __m256i chunk3 = _mm256_broadcastsi128_si256(* (const __m128i *)&in[i+32]);

        // first chunk of 12 bytes
        do_encode_12bytes(alphabet, out + o, chunk1);
        o += 16;

        // second chunk: 4 bytes left in chunk1
        do_encode_12bytes(alphabet, out + o, _mm256_alignr_epi8(chunk2, chunk1, 12));
        o += 16;

        // third chunk: 8 bytes left in chunk2
        do_encode_12bytes(alphabet, out + o, _mm256_alignr_epi8(chunk3, chunk2, 8));
        o += 16;

        // fourth chunk: 12 final bytes in chunk3
        do_encode_12bytes(alphabet, out + o, _mm256_srli_si256(chunk3, 4));
        o += 16;

        if (options & Base64InsertLineBreaks)
            out[o++] = '\n';
    }

    return _base64_encode_tail(out, o, in, n, options);
}
Example #2
0
void
test8bit (void)
{
    l1 = _mm256_mpsadbw_epu8 (l2, l3, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
    l1 = _mm256_alignr_epi8 (l2, l3, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
    i1 = _mm_blend_epi32 (i1, i1, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
    l1 = _mm256_blend_epi32 (l2, l3, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
    l1 = _mm256_blend_epi16(l2, l3, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
    l1 = _mm256_permute2x128_si256 (l2, l3, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
    e1 = _mm256_permute4x64_pd (e2, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
    l1 = _mm256_permute4x64_epi64 (l2, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
    l1 = _mm256_shuffle_epi32 (l2, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
    l1 = _mm256_shufflehi_epi16 (l2, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
    l1 = _mm256_shufflelo_epi16 (l2, 256);  /* { dg-error "the last argument must be an 8-bit immediate" } */
    l1 = _mm256_slli_si256 (l2, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
    l1 = _mm256_srli_si256 (l2, 256); /* { dg-error "the last argument must be an 8-bit immediate" } */
}
Example #3
0
__m256i test2_mm256_alignr_epi8(__m256i a, __m256i b) {
  // CHECK: @llvm.x86.avx2.psrl.dq({{.*}}, i32 8)
  return _mm256_alignr_epi8(a, b, 17);
}
Example #4
0
__m256i test_mm256_alignr_epi8(__m256i a, __m256i b) {
  // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> %{{.*}}, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49>
  return _mm256_alignr_epi8(a, b, 2);
}
__m256i test2_mm256_alignr_epi8(__m256i a, __m256i b) {
  // CHECK-LABEL: test2_mm256_alignr_epi8
  // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
  return _mm256_alignr_epi8(a, b, 17);
}
Example #6
0
/* Test the 256-bit form */
static void
avx2_test_palignr256 (__m256i t1, __m256i t2, unsigned int imm, __m256i * r)
{
  switch (imm)
    {
    case 0:
      *r = _mm256_alignr_epi8 (t1, t2, 0);
      break;
    case 1:
      *r = _mm256_alignr_epi8 (t1, t2, 1);
      break;
    case 2:
      *r = _mm256_alignr_epi8 (t1, t2, 2);
      break;
    case 3:
      *r = _mm256_alignr_epi8 (t1, t2, 3);
      break;
    case 4:
      *r = _mm256_alignr_epi8 (t1, t2, 4);
      break;
    case 5:
      *r = _mm256_alignr_epi8 (t1, t2, 5);
      break;
    case 6:
      *r = _mm256_alignr_epi8 (t1, t2, 6);
      break;
    case 7:
      *r = _mm256_alignr_epi8 (t1, t2, 7);
      break;
    case 8:
      *r = _mm256_alignr_epi8 (t1, t2, 8);
      break;
    case 9:
      *r = _mm256_alignr_epi8 (t1, t2, 9);
      break;
    case 10:
      *r = _mm256_alignr_epi8 (t1, t2, 10);
      break;
    case 11:
      *r = _mm256_alignr_epi8 (t1, t2, 11);
      break;
    case 12:
      *r = _mm256_alignr_epi8 (t1, t2, 12);
      break;
    case 13:
      *r = _mm256_alignr_epi8 (t1, t2, 13);
      break;
    case 14:
      *r = _mm256_alignr_epi8 (t1, t2, 14);
      break;
    case 15:
      *r = _mm256_alignr_epi8 (t1, t2, 15);
      break;
    case 16:
      *r = _mm256_alignr_epi8 (t1, t2, 16);
      break;
    case 17:
      *r = _mm256_alignr_epi8 (t1, t2, 17);
      break;
    case 18:
      *r = _mm256_alignr_epi8 (t1, t2, 18);
      break;
    case 19:
      *r = _mm256_alignr_epi8 (t1, t2, 19);
      break;
    case 20:
      *r = _mm256_alignr_epi8 (t1, t2, 20);
      break;
    case 21:
      *r = _mm256_alignr_epi8 (t1, t2, 21);
      break;
    case 22:
      *r = _mm256_alignr_epi8 (t1, t2, 22);
      break;
    case 23:
      *r = _mm256_alignr_epi8 (t1, t2, 23);
      break;
    case 24:
      *r = _mm256_alignr_epi8 (t1, t2, 24);
      break;
    case 25:
      *r = _mm256_alignr_epi8 (t1, t2, 25);
      break;
    case 26:
      *r = _mm256_alignr_epi8 (t1, t2, 26);
      break;
    case 27:
      *r = _mm256_alignr_epi8 (t1, t2, 27);
      break;
    case 28:
      *r = _mm256_alignr_epi8 (t1, t2, 28);
      break;
    case 29:
      *r = _mm256_alignr_epi8 (t1, t2, 29);
      break;
    case 30:
      *r = _mm256_alignr_epi8 (t1, t2, 30);
      break;
    case 31:
      *r = _mm256_alignr_epi8 (t1, t2, 31);
      break;
    default:
      *r = _mm256_alignr_epi8 (t1, t2, 32);
      break;
    }
}