int main(void) { uint8_t bitmap[2][9][3] = { { { 0x03, 0x03, 0x03 }, { 0x00, 0x00, 0x00 }, { 0x03, 0x03, 0x03 }, { 0x00, 0x00, 0x00 }, { 0x03, 0x03, 0x03 }, { 0x00, 0x00, 0x00 }, { 0x03, 0x03, 0x03 }, { 0x00, 0x00, 0x00 }, { 0x03, 0x03, 0x03 }, }, { { 0x00, 0x00, 0x00 }, { 0x03, 0x03, 0x03 }, { 0x01, 0x02, 0x04 }, { 0xF2, 0xF4, 0xF8 }, { 0x04, 0x08, 0x10 }, { 0xF8, 0xF0, 0x22 }, { 0x10, 0x20, 0x40 }, { 0xF1, 0x4F, 0x8F }, { 0x02, 0x80, 0x00 }, }, }; hexdump(stdout, bitmap, sizeof(bitmap)); uint8_t out[1024]; bitslice(out, NULL, (const uint8_t*) bitmap, 9, 2, 1); hexdump(stdout, out, 8 * 2 * 3); }
int crypto_stream_aes128ctr_afternm(unsigned char *out, unsigned long long len, const unsigned char *nonce, const unsigned char *c) { int128 xmm0; int128 xmm1; int128 xmm2; int128 xmm3; int128 xmm4; int128 xmm5; int128 xmm6; int128 xmm7; int128 xmm8; int128 xmm9; int128 xmm10; int128 xmm11; int128 xmm12; int128 xmm13; int128 xmm14; int128 xmm15; int128 nonce_stack; unsigned long long lensav; unsigned char bl[128]; unsigned char *blp; unsigned char *np; unsigned char b; uint32 tmp; /* Copy nonce on the stack */ copy2(&nonce_stack, (const int128 *) (nonce + 0)); np = (unsigned char *)&nonce_stack; enc_block: xmm0 = *(int128 *) (np + 0); copy2(&xmm1, &xmm0); shufb(&xmm1, SWAP32); copy2(&xmm2, &xmm1); copy2(&xmm3, &xmm1); copy2(&xmm4, &xmm1); copy2(&xmm5, &xmm1); copy2(&xmm6, &xmm1); copy2(&xmm7, &xmm1); add_uint32_big(&xmm1, 1); add_uint32_big(&xmm2, 2); add_uint32_big(&xmm3, 3); add_uint32_big(&xmm4, 4); add_uint32_big(&xmm5, 5); add_uint32_big(&xmm6, 6); add_uint32_big(&xmm7, 7); shufb(&xmm0, M0); shufb(&xmm1, M0SWAP); shufb(&xmm2, M0SWAP); shufb(&xmm3, M0SWAP); shufb(&xmm4, M0SWAP); shufb(&xmm5, M0SWAP); shufb(&xmm6, M0SWAP); shufb(&xmm7, M0SWAP); bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8) aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c) lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c) bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0) if(len < 128) goto partial; if(len == 128) goto full; tmp = LOAD32_BE(np + 12); tmp += 8; STORE32_BE(np + 12, tmp); *(int128 *) (out + 0) = xmm8; *(int128 *) (out + 16) = xmm9; *(int128 *) (out + 32) = xmm12; *(int128 *) (out + 48) = xmm14; *(int128 *) (out + 64) = xmm11; *(int128 *) (out + 80) = xmm15; *(int128 *) (out + 96) = xmm10; *(int128 *) (out + 112) = xmm13; len -= 128; out += 128; goto enc_block; partial: lensav = len; len >>= 4; tmp = LOAD32_BE(np + 12); tmp += len; STORE32_BE(np + 12, tmp); blp = bl; *(int128 *)(blp + 0) = xmm8; *(int128 *)(blp + 16) = xmm9; *(int128 *)(blp + 32) = xmm12; *(int128 *)(blp + 48) = xmm14; *(int128 *)(blp + 64) = xmm11; *(int128 *)(blp + 80) = xmm15; *(int128 *)(blp + 96) = xmm10; *(int128 *)(blp + 112) = xmm13; bytes: if(lensav == 0) goto end; b = blp[0]; /* clang false positive */ *(unsigned char *)(out + 0) = b; blp += 1; out +=1; lensav -= 1; goto bytes; full: tmp = LOAD32_BE(np + 12); tmp += 8; STORE32_BE(np + 12, tmp); *(int128 *) (out + 0) = xmm8; *(int128 *) (out + 16) = xmm9; *(int128 *) (out + 32) = xmm12; *(int128 *) (out + 48) = xmm14; *(int128 *) (out + 64) = xmm11; *(int128 *) (out + 80) = xmm15; *(int128 *) (out + 96) = xmm10; *(int128 *) (out + 112) = xmm13; end: return 0; }
void cipher_time() { // runs of 100000 to get interquartile range, median and mean uint32_t state[64] = {0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff}; uint32_t key_0[64] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; uint32_t key_1[64] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; uint32_t RC[12][64]; int bit; bitslice(RC[0],0x0000000000000000, 64); bitslice(RC[1],0x13198a2e03707344, 64); bitslice(RC[2],0xa4093822299f31d0, 64); bitslice(RC[3],0x082efa98ec4e6c89, 64); bitslice(RC[4],0x452821e638d01377, 64); bitslice(RC[5],0xbe5466cf34e90c6c, 64); bitslice(RC[6],0x7ef84f78fd955cb1, 64); bitslice(RC[7],0x85840851f1ac43aa, 64); bitslice(RC[8],0xc882d32f25323c54, 64); bitslice(RC[9],0x64a51195e0e3610d, 64); bitslice(RC[10],0xd3b5a399ca0c2399, 64); bitslice(RC[11],0xc0ac29b7c97c50dd, 64); clock_t start, end; mach_timebase_info_data_t info; mach_timebase_info(&info); for(int run = 0; run < 100000; run++) { start = mach_absolute_time(); enc(RC, state, key_0, key_1); end = mach_absolute_time(); printf("%lu\n",(end - start)); for(bit = 0; bit < 64; bit++) { state[bit] = 0xffffffff; key_0[bit] = 0; key_1[bit] = 0; } } }
void lastRounds_time() { // runs of 100000 to get mean for last rounds function uint32_t state[64] = {0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff}; uint32_t key_0[64] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; uint32_t key_1[64] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; uint32_t RC[12][64]; int bit; bitslice(RC[0],0x0000000000000000, 64); bitslice(RC[1],0x13198a2e03707344, 64); bitslice(RC[2],0xa4093822299f31d0, 64); bitslice(RC[3],0x082efa98ec4e6c89, 64); bitslice(RC[4],0x452821e638d01377, 64); bitslice(RC[5],0xbe5466cf34e90c6c, 64); bitslice(RC[6],0x7ef84f78fd955cb1, 64); bitslice(RC[7],0x85840851f1ac43aa, 64); bitslice(RC[8],0xc882d32f25323c54, 64); bitslice(RC[9],0x64a51195e0e3610d, 64); bitslice(RC[10],0xd3b5a399ca0c2399, 64); bitslice(RC[11],0xc0ac29b7c97c50dd, 64); clock_t start, end, result = 0; mach_timebase_info_data_t info; mach_timebase_info(&info); for(int run = 0; run < 100000; run++) { start = mach_absolute_time(); last_rounds(state, key_0, RC); end = mach_absolute_time(); result += (end - start); for(bit = 0; bit < 64; bit++) { state[bit] = 0xffffffff; key_0[bit] = 0; key_1[bit] = 0; } } printf("last_rounds time: %lu\n", (result / 100000) * info.numer / info.denom); }
void cipher_constant_time() { // 1000 runs to investigate individual run times uint32_t state[64] = {0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff}; uint32_t key_0[64] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; uint32_t key_1[64] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; uint32_t RC[12][64]; int bit; bitslice(RC[0],0x0000000000000000, 64); bitslice(RC[1],0x13198a2e03707344, 64); bitslice(RC[2],0xa4093822299f31d0, 64); bitslice(RC[3],0x082efa98ec4e6c89, 64); bitslice(RC[4],0x452821e638d01377, 64); bitslice(RC[5],0xbe5466cf34e90c6c, 64); bitslice(RC[6],0x7ef84f78fd955cb1, 64); bitslice(RC[7],0x85840851f1ac43aa, 64); bitslice(RC[8],0xc882d32f25323c54, 64); bitslice(RC[9],0x64a51195e0e3610d, 64); bitslice(RC[10],0xd3b5a399ca0c2399, 64); bitslice(RC[11],0xc0ac29b7c97c50dd, 64); clock_t start, end, curr_time = 0; mach_timebase_info_data_t info; mach_timebase_info(&info); for(int run = 0; run < 1000; run++) { start = mach_absolute_time(); enc(RC, state, key_0, key_1); end = mach_absolute_time(); curr_time = (end - start); for(bit = 0; bit < 64; bit++) { state[bit] = 0xffffffff; key_0[bit] = 0; key_1[bit] = 0; } } for(int run = 0; run < 1000; run++) { start = mach_absolute_time(); enc(RC, state, key_0, key_1); end = mach_absolute_time(); curr_time = (end - start); for(bit = 0; bit < 64; bit++) { state[bit] = 0xffffffff; key_0[bit] = 0; key_1[bit] = 0; } printf("%d %lu a\n",run + 1, (curr_time * info.numer) / info.denom); } }
void prince_test_3(){ // cipher Text: 0x9FB51935FC3DF524 // plaintext: 0x0000000000000000 // k0:0xffffffffffffffff // k1: 0x0000000000000000 uint32_t exp_res[64] = {0,0,1,0,0,1,0,0,1,0,1,0,1,1,1,1,1,0,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,0,1,0,1,1,0,0,1,0,0,1,1,0,0,0,1,0,1,0,1,1,0,1,1,1,1,1,1,0,0,1}; uint32_t state[64] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; uint32_t key_0[64] = {0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff}; uint32_t key_1[64] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; uint32_t RC[12][64]; int bit; int slice; bitslice(RC[0],0x0000000000000000, 64); bitslice(RC[1],0x13198a2e03707344, 64); bitslice(RC[2],0xa4093822299f31d0, 64); bitslice(RC[3],0x082efa98ec4e6c89, 64); bitslice(RC[4],0x452821e638d01377, 64); bitslice(RC[5],0xbe5466cf34e90c6c, 64); bitslice(RC[6],0x7ef84f78fd955cb1, 64); bitslice(RC[7],0x85840851f1ac43aa, 64); bitslice(RC[8],0xc882d32f25323c54, 64); bitslice(RC[9],0x64a51195e0e3610d, 64); bitslice(RC[10],0xd3b5a399ca0c2399, 64); bitslice(RC[11],0xc0ac29b7c97c50dd, 64); enc(RC, state, key_0, key_1); for(slice = 0; slice < 32; slice++) { for(bit = 0; bit < 64; bit++) { sput_fail_unless(((state[bit] >> slice) & 0x1) == exp_res[bit],"Prince Test 3"); } } }