/* MixBytes reversibly mixes the bytes within a column */
void MixBytes(u8 x[ROWS][COLS1024], int columns) {
  int i, j;
  u8 temp[ROWS];

  for (i = 0; i < columns; i++) {
    for (j = 0; j < ROWS; j++) {
      temp[j] = 
	mul2(x[(j+0)%ROWS][i])^
	mul2(x[(j+1)%ROWS][i])^
	mul3(x[(j+2)%ROWS][i])^
	mul4(x[(j+3)%ROWS][i])^
	mul5(x[(j+4)%ROWS][i])^
	mul3(x[(j+5)%ROWS][i])^
	mul5(x[(j+6)%ROWS][i])^
	mul7(x[(j+7)%ROWS][i]);
    }
    for (j = 0; j < ROWS; j++) {
      x[j][i] = temp[j];
    }
  }
}
Exemple #2
0
/*
DFunc : OTR Core Decryption Function, with nonce encryption 
*/
__m128i DFunc(
	const uint8 *nonce,
	uint32 nonce_len,
#if(ADP==Seri)
	const __m128i TA,
#endif
	const uint8 *ciphertext,
	uint32 ci_len,
	uint32 t_len,
	uint8 *plaintext)
{
	uint32 i;
	uint32 ell = 0; //number of 2BLOCK-byte chunks, excl. last one
	uint32 last = 0; //number of bytes in the last chunks

	block Sum = _mm_setzero_si128();
	block txt[PIPE], Ln[PIPE + 1];
	uint32 rest_len = ci_len;
	__m128i *ptp = (__m128i*)plaintext;
	const __m128i *ctp = (__m128i*)ciphertext;
	ALIGN(16)uint8 tmp[BLOCK] = { 0 };
	block *La;

	/* Encryption of nonce */
	memcpy(&tmp[BLOCK - nonce_len], nonce, nonce_len);
	tmp[0] = (uint8)((t_len%BLOCK) << 4);
	tmp[BLOCK - nonce_len - 1] |= 0x01;
	Ln[0] = _mm_load_si128((__m128i*)tmp);
	AES_encrypt(Ln[0], &Ln[0], encrypt_key);

#if (ADP==Seri)
	Ln[0] = _mm_xor_si128(Ln[0], TA);
	mul2(Ln[0], &Ln[0]);
#endif
	while (rest_len > (DBLOCK*PIPE)){
		/* first round*/
		mul2_PIPE(Ln);
		txt[0] = _mm_xor_si128(Ln[0], ctp[0]);
		txt[0] = _mm_xor_si128(Ln[1], txt[0]); 
		txt[1] = _mm_xor_si128(Ln[1], ctp[2]);
		txt[1] = _mm_xor_si128(Ln[2], txt[1]); 
		txt[2] = _mm_xor_si128(Ln[2], ctp[4]);
		txt[2] = _mm_xor_si128(Ln[3], txt[2]); 
		txt[3] = _mm_xor_si128(Ln[3], ctp[6]);
		txt[3] = _mm_xor_si128(Ln[4], txt[3]); 
#if (PIPE>=5)
		txt[4] = _mm_xor_si128(Ln[4], ctp[8]);
		txt[4] = _mm_xor_si128(Ln[5], txt[4]); 
#endif
#if (PIPE>=6)
		txt[5] = _mm_xor_si128(Ln[5], ctp[10]);
		txt[5] = _mm_xor_si128(Ln[6], txt[5]); 
#endif
#if (PIPE>=7)
		txt[6] = _mm_xor_si128(Ln[6], ctp[12]);
		txt[6] = _mm_xor_si128(Ln[7], txt[6]); 
#endif
#if (PIPE==8)
		txt[7] = _mm_xor_si128(Ln[7], ctp[14]);
		txt[7] = _mm_xor_si128(Ln[8], txt[7]); 
#endif
		AES_ecb_encrypt_PIPE(txt, encrypt_key);
		/* second round*/
		ptp[0] = _mm_xor_si128(txt[0], ctp[1]);
		txt[0] = _mm_xor_si128(Ln[0], ptp[0]);
		ptp[2] = _mm_xor_si128(txt[1], ctp[3]);
		txt[1] = _mm_xor_si128(Ln[1], ptp[2]);
		ptp[4] = _mm_xor_si128(txt[2], ctp[5]);
		txt[2] = _mm_xor_si128(Ln[2], ptp[4]);
		ptp[6] = _mm_xor_si128(txt[3], ctp[7]);
		txt[3] = _mm_xor_si128(Ln[3], ptp[6]);
#if (PIPE>=5)
		ptp[8] = _mm_xor_si128(txt[4], ctp[9]);
		txt[4] = _mm_xor_si128(Ln[4], ptp[8]);
#endif
#if (PIPE>=6)
		ptp[10] = _mm_xor_si128(txt[5], ctp[11]);
		txt[5] = _mm_xor_si128(Ln[5], ptp[10]);
#endif
#if (PIPE>=7)
		ptp[12] = _mm_xor_si128(txt[6], ctp[13]);
		txt[6] = _mm_xor_si128(Ln[6], ptp[12]);
#endif
#if (PIPE==8)
		ptp[14] = _mm_xor_si128(txt[7], ctp[15]);
		txt[7] = _mm_xor_si128(Ln[7], ptp[14]);
#endif
		AES_ecb_encrypt_PIPE(txt, encrypt_key);
		ptp[1] = _mm_xor_si128(txt[0], ctp[0]);
		Sum = _mm_xor_si128(Sum, ptp[1]);
		ptp[3] = _mm_xor_si128(txt[1], ctp[2]);
		Sum = _mm_xor_si128(Sum, ptp[3]);
		ptp[5] = _mm_xor_si128(txt[2], ctp[4]);
		Sum = _mm_xor_si128(Sum, ptp[5]);
		ptp[7] = _mm_xor_si128(txt[3], ctp[6]);
		Sum = _mm_xor_si128(Sum, ptp[7]);
#if (PIPE>=5)
		ptp[9] = _mm_xor_si128(txt[4], ctp[8]);
		Sum = _mm_xor_si128(Sum, ptp[9]);
#endif
#if (PIPE>=6)
		ptp[11] = _mm_xor_si128(txt[5], ctp[10]);
		Sum = _mm_xor_si128(Sum, ptp[11]);
#endif
#if (PIPE>=7)
		ptp[13] = _mm_xor_si128(txt[6], ctp[12]);
		Sum = _mm_xor_si128(Sum, ptp[13]);
#endif
#if (PIPE==8)
		ptp[15] = _mm_xor_si128(txt[7], ctp[14]);
		Sum = _mm_xor_si128(Sum, ptp[15]);
#endif
		Ln[0] = _mm_load_si128(&Ln[PIPE]);
		ptp += (2 * PIPE);
		ctp += (2 * PIPE);
		rest_len -= (DBLOCK*PIPE);
	}

	if (rest_len != 0){
		last = rest_len % DBLOCK;
		if (last == 0) last = DBLOCK;
		ell = (rest_len - last) / DBLOCK; // plaintext length = 2BLOCK*ell + last (non-zero)
	}

	/* 2-round Feistel for the full chunks */
	mul3(Ln[0], &Ln[1]);
	for (i = 0; i < (2 * ell); i += 2){
		txt[0] = _mm_xor_si128(Ln[1], ctp[i]);
		AES_encrypt(txt[0], &txt[0], encrypt_key);
		ptp[i] = _mm_xor_si128(txt[0], ctp[i + 1]);
		txt[0] = _mm_xor_si128(Ln[0], ptp[i]);
		AES_encrypt(txt[0], &txt[0], encrypt_key);
		ptp[i + 1] = _mm_xor_si128(txt[0], ctp[i]);
		Sum = _mm_xor_si128(Sum, ptp[i + 1]);
		Ln[0] = _mm_xor_si128(Ln[0], Ln[1]);
		mul2(Ln[1], &Ln[1]);
	}
	ptp += (2 * ell);
	ctp += (2 * ell);
	/* Last chunk */
	if (last <= BLOCK){ 	//odd block, including the case pl_len = 0 (no plaintext)
		AES_encrypt(Ln[0], &txt[0], encrypt_key); //txt[0] is Z
		xorp(last, &txt[0], (uint8*)&ctp[0], (uint8*)&ptp[0]);
		ozp(last, (uint8*)&ptp[0], &txt[0]);
		Sum = _mm_xor_si128(txt[0], Sum);
		La = &Ln[0];
	}
	else{//even blocks, last > BLOCK always holds. 2-round Feistel with last swap
		ozp(last - BLOCK, (uint8*)&ctp[1], &txt[0]);
		Sum = _mm_xor_si128(Sum, txt[0]);
		txt[0] = _mm_xor_si128(Ln[1], txt[0]);
		AES_encrypt(txt[0], &txt[0], encrypt_key);
		ptp[0] = _mm_xor_si128(txt[0], ctp[0]);
		txt[0] = _mm_xor_si128(Ln[0], ptp[0]);
		AES_encrypt(txt[0], &txt[1], encrypt_key); //txt[1] is Z
		xorp(last - BLOCK, &txt[1], (uint8*)&ctp[1], (uint8*)&ptp[1]);
		Sum = _mm_xor_si128(Sum, txt[1]);
		La = &Ln[1];
	}
	/* TE generation */
	if (last == BLOCK || last == DBLOCK){//last = 16 or 32
		mul7(*La, La);
	}
	else{
		mul3twice(*La, La);
	}
	Sum = _mm_xor_si128(Sum, *La);	//Sum = (3^2 or 7)L* xor Sum
	AES_encrypt(Sum, &Sum, encrypt_key);
	return Sum;//TE
}//end of DFunc