Beispiel #1
0
	// Load string:
	__m128i str = _mm_loadu_si128((__m128i *)c);

	// The input consists of six character sets in the Base64 alphabet,
	// which we need to map back to the 6-bit values they represent.
	// There are three ranges, two singles, and then there's the rest.
	//
	//  #  From       To        Add  Characters
	//  1  [43]       [62]      +19  +
	//  2  [47]       [63]      +16  /
	//  3  [48..57]   [52..61]   +4  0..9
	//  4  [65..90]   [0..25]   -65  A..Z
	//  5  [97..122]  [26..51]  -71  a..z
	// (6) Everything else => invalid input

	const __m128i set1 = CMPEQ(str, '+');
	const __m128i set2 = CMPEQ(str, '/');
	const __m128i set3 = RANGE(str, '0', '9');
	const __m128i set4 = RANGE(str, 'A', 'Z');
	const __m128i set5 = RANGE(str, 'a', 'z');

	__m128i delta = REPLACE(set1, 19);
	delta = _mm_or_si128(delta, REPLACE(set2,  16));
	delta = _mm_or_si128(delta, REPLACE(set3,   4));
	delta = _mm_or_si128(delta, REPLACE(set4, -65));
	delta = _mm_or_si128(delta, REPLACE(set5, -71));

	// Check for invalid input: if any of the delta values are zero,
	// fall back on bytewise code to do error checking and reporting:
	if (_mm_movemask_epi8(CMPEQ(delta, 0))) {
		break;
Beispiel #2
0
	// Load string:
	__m256i str = _mm256_loadu_si256((__m256i *)c);

	// The input consists of six character sets in the Base64 alphabet,
	// which we need to map back to the 6-bit values they represent.
	// There are three ranges, two singles, and then there's the rest.
	//
	//  #  From       To        Add  Characters
	//  1  [43]       [62]      +19  +
	//  2  [47]       [63]      +16  /
	//  3  [48..57]   [52..61]   +4  0..9
	//  4  [65..90]   [0..25]   -65  A..Z
	//  5  [97..122]  [26..51]  -71  a..z
	// (6) Everything else => invalid input

	const __m256i set1 = CMPEQ(str, '+');
	const __m256i set2 = CMPEQ(str, '/');
	const __m256i set3 = RANGE(str, '0', '9');
	const __m256i set4 = RANGE(str, 'A', 'Z');
	const __m256i set5 = RANGE(str, 'a', 'z');

	__m256i delta = REPLACE(set1, 19);
	delta = _mm256_or_si256(delta, REPLACE(set2,  16));
	delta = _mm256_or_si256(delta, REPLACE(set3,   4));
	delta = _mm256_or_si256(delta, REPLACE(set4, -65));
	delta = _mm256_or_si256(delta, REPLACE(set5, -71));

	// Check for invalid input: if any of the delta values are zero,
	// fall back on bytewise code to do error checking and reporting:
	if (_mm256_movemask_epi8(CMPEQ(delta, 0))) {
		break;
static inline uint8x16x4_t
enc_translate (uint8x16x4_t in)
{
	uint8x16x4_t mask1, mask2, mask3, mask4, out;

	// Translate values 0..63 to the Base64 alphabet. There are five sets:
	// #  From      To         Abs  Delta  Characters
	// 0  [0..25]   [65..90]   +65  +65    ABCDEFGHIJKLMNOPQRSTUVWXYZ
	// 1  [26..51]  [97..122]  +71   +6    abcdefghijklmnopqrstuvwxyz
	// 2  [52..61]  [48..57]    -4  -75    0123456789
	// 3  [62]      [43]       -19  -15    +
	// 4  [63]      [47]       -16   +3    /

	// Create cumulative masks for characters in sets [1,2,3,4], [2,3,4],
	// [3,4], and [4]:
	mask1.val[0] = CMPGT(in.val[0], 25);
	mask1.val[1] = CMPGT(in.val[1], 25);
	mask1.val[2] = CMPGT(in.val[2], 25);
	mask1.val[3] = CMPGT(in.val[3], 25);

	mask2.val[0] = CMPGT(in.val[0], 51);
	mask2.val[1] = CMPGT(in.val[1], 51);
	mask2.val[2] = CMPGT(in.val[2], 51);
	mask2.val[3] = CMPGT(in.val[3], 51);

	mask3.val[0] = CMPGT(in.val[0], 61);
	mask3.val[1] = CMPGT(in.val[1], 61);
	mask3.val[2] = CMPGT(in.val[2], 61);
	mask3.val[3] = CMPGT(in.val[3], 61);

	mask4.val[0] = CMPEQ(in.val[0], 63);
	mask4.val[1] = CMPEQ(in.val[1], 63);
	mask4.val[2] = CMPEQ(in.val[2], 63);
	mask4.val[3] = CMPEQ(in.val[3], 63);

	// All characters are at least in cumulative set 0, so add 'A':
	out.val[0] = vaddq_u8(in.val[0], vdupq_n_u8(65));
	out.val[1] = vaddq_u8(in.val[1], vdupq_n_u8(65));
	out.val[2] = vaddq_u8(in.val[2], vdupq_n_u8(65));
	out.val[3] = vaddq_u8(in.val[3], vdupq_n_u8(65));

	// For inputs which are also in any of the other cumulative sets,
	// add delta values against the previous set(s) to correct the shift:
	out.val[0] = vaddq_u8(out.val[0], REPLACE(mask1.val[0], 6));
	out.val[1] = vaddq_u8(out.val[1], REPLACE(mask1.val[1], 6));
	out.val[2] = vaddq_u8(out.val[2], REPLACE(mask1.val[2], 6));
	out.val[3] = vaddq_u8(out.val[3], REPLACE(mask1.val[3], 6));

	out.val[0] = vsubq_u8(out.val[0], REPLACE(mask2.val[0], 75));
	out.val[1] = vsubq_u8(out.val[1], REPLACE(mask2.val[1], 75));
	out.val[2] = vsubq_u8(out.val[2], REPLACE(mask2.val[2], 75));
	out.val[3] = vsubq_u8(out.val[3], REPLACE(mask2.val[3], 75));

	out.val[0] = vsubq_u8(out.val[0], REPLACE(mask3.val[0], 15));
	out.val[1] = vsubq_u8(out.val[1], REPLACE(mask3.val[1], 15));
	out.val[2] = vsubq_u8(out.val[2], REPLACE(mask3.val[2], 15));
	out.val[3] = vsubq_u8(out.val[3], REPLACE(mask3.val[3], 15));

	out.val[0] = vaddq_u8(out.val[0], REPLACE(mask4.val[0], 3));
	out.val[1] = vaddq_u8(out.val[1], REPLACE(mask4.val[1], 3));
	out.val[2] = vaddq_u8(out.val[2], REPLACE(mask4.val[2], 3));
	out.val[3] = vaddq_u8(out.val[3], REPLACE(mask4.val[3], 3));

	return out;
}