/* MixBytes reversibly mixes the bytes within a column */ void MixBytes(u8 x[ROWS][COLS1024], int columns) { int i, j; u8 temp[ROWS]; for (i = 0; i < columns; i++) { for (j = 0; j < ROWS; j++) { temp[j] = mul2(x[(j+0)%ROWS][i])^ mul2(x[(j+1)%ROWS][i])^ mul3(x[(j+2)%ROWS][i])^ mul4(x[(j+3)%ROWS][i])^ mul5(x[(j+4)%ROWS][i])^ mul3(x[(j+5)%ROWS][i])^ mul5(x[(j+6)%ROWS][i])^ mul7(x[(j+7)%ROWS][i]); } for (j = 0; j < ROWS; j++) { x[j][i] = temp[j]; } } }
/* DFunc : OTR Core Decryption Function, with nonce encryption */ __m128i DFunc( const uint8 *nonce, uint32 nonce_len, #if(ADP==Seri) const __m128i TA, #endif const uint8 *ciphertext, uint32 ci_len, uint32 t_len, uint8 *plaintext) { uint32 i; uint32 ell = 0; //number of 2BLOCK-byte chunks, excl. last one uint32 last = 0; //number of bytes in the last chunks block Sum = _mm_setzero_si128(); block txt[PIPE], Ln[PIPE + 1]; uint32 rest_len = ci_len; __m128i *ptp = (__m128i*)plaintext; const __m128i *ctp = (__m128i*)ciphertext; ALIGN(16)uint8 tmp[BLOCK] = { 0 }; block *La; /* Encryption of nonce */ memcpy(&tmp[BLOCK - nonce_len], nonce, nonce_len); tmp[0] = (uint8)((t_len%BLOCK) << 4); tmp[BLOCK - nonce_len - 1] |= 0x01; Ln[0] = _mm_load_si128((__m128i*)tmp); AES_encrypt(Ln[0], &Ln[0], encrypt_key); #if (ADP==Seri) Ln[0] = _mm_xor_si128(Ln[0], TA); mul2(Ln[0], &Ln[0]); #endif while (rest_len > (DBLOCK*PIPE)){ /* first round*/ mul2_PIPE(Ln); txt[0] = _mm_xor_si128(Ln[0], ctp[0]); txt[0] = _mm_xor_si128(Ln[1], txt[0]); txt[1] = _mm_xor_si128(Ln[1], ctp[2]); txt[1] = _mm_xor_si128(Ln[2], txt[1]); txt[2] = _mm_xor_si128(Ln[2], ctp[4]); txt[2] = _mm_xor_si128(Ln[3], txt[2]); txt[3] = _mm_xor_si128(Ln[3], ctp[6]); txt[3] = _mm_xor_si128(Ln[4], txt[3]); #if (PIPE>=5) txt[4] = _mm_xor_si128(Ln[4], ctp[8]); txt[4] = _mm_xor_si128(Ln[5], txt[4]); #endif #if (PIPE>=6) txt[5] = _mm_xor_si128(Ln[5], ctp[10]); txt[5] = _mm_xor_si128(Ln[6], txt[5]); #endif #if (PIPE>=7) txt[6] = _mm_xor_si128(Ln[6], ctp[12]); txt[6] = _mm_xor_si128(Ln[7], txt[6]); #endif #if (PIPE==8) txt[7] = _mm_xor_si128(Ln[7], ctp[14]); txt[7] = _mm_xor_si128(Ln[8], txt[7]); #endif AES_ecb_encrypt_PIPE(txt, encrypt_key); /* second round*/ ptp[0] = _mm_xor_si128(txt[0], ctp[1]); txt[0] = _mm_xor_si128(Ln[0], ptp[0]); ptp[2] = _mm_xor_si128(txt[1], ctp[3]); txt[1] = _mm_xor_si128(Ln[1], ptp[2]); ptp[4] = _mm_xor_si128(txt[2], ctp[5]); txt[2] = _mm_xor_si128(Ln[2], ptp[4]); ptp[6] = _mm_xor_si128(txt[3], ctp[7]); txt[3] = _mm_xor_si128(Ln[3], ptp[6]); #if (PIPE>=5) ptp[8] = _mm_xor_si128(txt[4], ctp[9]); txt[4] = _mm_xor_si128(Ln[4], ptp[8]); #endif #if (PIPE>=6) ptp[10] = _mm_xor_si128(txt[5], ctp[11]); txt[5] = _mm_xor_si128(Ln[5], ptp[10]); #endif #if (PIPE>=7) ptp[12] = _mm_xor_si128(txt[6], ctp[13]); txt[6] = _mm_xor_si128(Ln[6], ptp[12]); #endif #if (PIPE==8) ptp[14] = _mm_xor_si128(txt[7], ctp[15]); txt[7] = _mm_xor_si128(Ln[7], ptp[14]); #endif AES_ecb_encrypt_PIPE(txt, encrypt_key); ptp[1] = _mm_xor_si128(txt[0], ctp[0]); Sum = _mm_xor_si128(Sum, ptp[1]); ptp[3] = _mm_xor_si128(txt[1], ctp[2]); Sum = _mm_xor_si128(Sum, ptp[3]); ptp[5] = _mm_xor_si128(txt[2], ctp[4]); Sum = _mm_xor_si128(Sum, ptp[5]); ptp[7] = _mm_xor_si128(txt[3], ctp[6]); Sum = _mm_xor_si128(Sum, ptp[7]); #if (PIPE>=5) ptp[9] = _mm_xor_si128(txt[4], ctp[8]); Sum = _mm_xor_si128(Sum, ptp[9]); #endif #if (PIPE>=6) ptp[11] = _mm_xor_si128(txt[5], ctp[10]); Sum = _mm_xor_si128(Sum, ptp[11]); #endif #if (PIPE>=7) ptp[13] = _mm_xor_si128(txt[6], ctp[12]); Sum = _mm_xor_si128(Sum, ptp[13]); #endif #if (PIPE==8) ptp[15] = _mm_xor_si128(txt[7], ctp[14]); Sum = _mm_xor_si128(Sum, ptp[15]); #endif Ln[0] = _mm_load_si128(&Ln[PIPE]); ptp += (2 * PIPE); ctp += (2 * PIPE); rest_len -= (DBLOCK*PIPE); } if (rest_len != 0){ last = rest_len % DBLOCK; if (last == 0) last = DBLOCK; ell = (rest_len - last) / DBLOCK; // plaintext length = 2BLOCK*ell + last (non-zero) } /* 2-round Feistel for the full chunks */ mul3(Ln[0], &Ln[1]); for (i = 0; i < (2 * ell); i += 2){ txt[0] = _mm_xor_si128(Ln[1], ctp[i]); AES_encrypt(txt[0], &txt[0], encrypt_key); ptp[i] = _mm_xor_si128(txt[0], ctp[i + 1]); txt[0] = _mm_xor_si128(Ln[0], ptp[i]); AES_encrypt(txt[0], &txt[0], encrypt_key); ptp[i + 1] = _mm_xor_si128(txt[0], ctp[i]); Sum = _mm_xor_si128(Sum, ptp[i + 1]); Ln[0] = _mm_xor_si128(Ln[0], Ln[1]); mul2(Ln[1], &Ln[1]); } ptp += (2 * ell); ctp += (2 * ell); /* Last chunk */ if (last <= BLOCK){ //odd block, including the case pl_len = 0 (no plaintext) AES_encrypt(Ln[0], &txt[0], encrypt_key); //txt[0] is Z xorp(last, &txt[0], (uint8*)&ctp[0], (uint8*)&ptp[0]); ozp(last, (uint8*)&ptp[0], &txt[0]); Sum = _mm_xor_si128(txt[0], Sum); La = &Ln[0]; } else{//even blocks, last > BLOCK always holds. 2-round Feistel with last swap ozp(last - BLOCK, (uint8*)&ctp[1], &txt[0]); Sum = _mm_xor_si128(Sum, txt[0]); txt[0] = _mm_xor_si128(Ln[1], txt[0]); AES_encrypt(txt[0], &txt[0], encrypt_key); ptp[0] = _mm_xor_si128(txt[0], ctp[0]); txt[0] = _mm_xor_si128(Ln[0], ptp[0]); AES_encrypt(txt[0], &txt[1], encrypt_key); //txt[1] is Z xorp(last - BLOCK, &txt[1], (uint8*)&ctp[1], (uint8*)&ptp[1]); Sum = _mm_xor_si128(Sum, txt[1]); La = &Ln[1]; } /* TE generation */ if (last == BLOCK || last == DBLOCK){//last = 16 or 32 mul7(*La, La); } else{ mul3twice(*La, La); } Sum = _mm_xor_si128(Sum, *La); //Sum = (3^2 or 7)L* xor Sum AES_encrypt(Sum, &Sum, encrypt_key); return Sum;//TE }//end of DFunc