// Run 16 parallel hashes void sha2Hash(vecu16 pointers, int totalBlocks, vecu16 outHashes) { // Initial H values vecu16 A = __builtin_vp_makevectori(0x6A09E667); vecu16 B = __builtin_vp_makevectori(0xBB67AE85); vecu16 C = __builtin_vp_makevectori(0x3C6EF372); vecu16 D = __builtin_vp_makevectori(0xA54FF53A); vecu16 E = __builtin_vp_makevectori(0x510E527F); vecu16 F = __builtin_vp_makevectori(0x9B05688C); vecu16 G = __builtin_vp_makevectori(0x1F83D9AB); vecu16 H = __builtin_vp_makevectori(0x5BE0CD19); for (int i = 0; i < totalBlocks; i++) { vecu16 W[64]; for (int index = 0; index < 16; index++) { W[index] = __builtin_vp_gather_loadi(pointers); pointers += __builtin_vp_makevectori(4); } for (int index = 16; index < 64; index++) W[index] = SIG1(W[index - 2]) + W[index - 7] + SIG0(W[index - 15]) + W[index - 16]; for (int round = 0; round < 64; round++) { vecu16 temp1 = H + SIG1(E) + CH(E, F, G) + __builtin_vp_makevectori(K[round]) + W[round]; vecu16 temp2 = SIG0(A) + MA(A, B, C); H = G; G = F; F = E; E = D + temp1; D = C; C = B; B = A; A = temp1 + temp2; } } // doesn't add padding or length fields to end... __builtin_vp_scatter_storei(outHashes, A); __builtin_vp_scatter_storei(outHashes + __builtin_vp_makevectori(4), B); __builtin_vp_scatter_storei(outHashes + __builtin_vp_makevectori(8), C); __builtin_vp_scatter_storei(outHashes + __builtin_vp_makevectori(12), D); __builtin_vp_scatter_storei(outHashes + __builtin_vp_makevectori(16), E); __builtin_vp_scatter_storei(outHashes + __builtin_vp_makevectori(20), F); __builtin_vp_scatter_storei(outHashes + __builtin_vp_makevectori(24), G); __builtin_vp_scatter_storei(outHashes + __builtin_vp_makevectori(28), H); }
veci16 test_gatherloadi(veci16 ptr) // CHECK: test_gatherloadi { return __builtin_vp_gather_loadi(ptr); // CHECK: load_gath v{{[0-9]+}}, (v0) }