static __inline u_int64_t s64_3( u_int64_t x)
{
	u_int64_t ans = shl(x, 2);
	u_int64_t roll_temp = rotl64(x, 28);
	ans ^= roll_temp;
	ans ^= shr(x, 2);
	ans ^= rotl64(roll_temp, 31);
	return ans; //(shr((x), 2) ^ shl((x), 2) ^ rotl64((x), 28) ^ rotl64((x), 59));
}
static __inline u_int64_t s64_2( u_int64_t x)
{
	u_int64_t ans = shl(x, 1);
	u_int64_t roll_temp = rotl64(x, 19);
	ans ^= roll_temp;
	ans ^= shr(x, 2);
	ans ^= rotl64(roll_temp, 34);
	return ans; //(shr((x), 2) ^ shl((x), 1) ^ rotl64((x), 19) ^ rotl64((x), 53));
}
示例#3
0
			void finish()
			{
				uint8_t const * tail = &B[0];

				uint64_t k1 = 0;
				uint64_t k2 = 0;

				size_t const len = f + numblocks*sizeof(B)/sizeof(B[0]);

				uint64_t const c1 = 0x87c37b91114253d5LLU;
				uint64_t const c2 = 0x4cf5ad432745937fLLU;

				switch(len & 15)
				{
					case 15: k2 ^= ((uint64_t)tail[14]) << 48;
					case 14: k2 ^= ((uint64_t)tail[13]) << 40;
					case 13: k2 ^= ((uint64_t)tail[12]) << 32;
					case 12: k2 ^= ((uint64_t)tail[11]) << 24;
					case 11: k2 ^= ((uint64_t)tail[10]) << 16;
					case 10: k2 ^= ((uint64_t)tail[ 9]) << 8;
					case  9: k2 ^= ((uint64_t)tail[ 8]) << 0;
						k2 *= c2; k2  = rotl64(k2,33); k2 *= c1; h2 ^= k2;
					case  8: k1 ^= ((uint64_t)tail[ 7]) << 56;
					case  7: k1 ^= ((uint64_t)tail[ 6]) << 48;
					case  6: k1 ^= ((uint64_t)tail[ 5]) << 40;
					case  5: k1 ^= ((uint64_t)tail[ 4]) << 32;
					case  4: k1 ^= ((uint64_t)tail[ 3]) << 24;
					case  3: k1 ^= ((uint64_t)tail[ 2]) << 16;
					case  2: k1 ^= ((uint64_t)tail[ 1]) << 8;
					case  1: k1 ^= ((uint64_t)tail[ 0]) << 0;
					k1 *= c1; k1  = rotl64(k1,31); k1 *= c2; h1 ^= k1;
				};

				//----------
				// finalization

				h1 ^= len; h2 ^= len;

				h1 += h2;
				h2 += h1;

				h1 = fmix64(h1);
				h2 = fmix64(h2);

				h1 += h2;
				h2 += h1;
			}
static __inline u_int64_t s64_1( u_int64_t x)
{
	u_int64_t ans = shl(x, 2);
	u_int64_t roll_temp = rotl64(x, 43);
	ans ^= roll_temp;
	ans ^= shr(x, 1);
	ans ^= rotr64(roll_temp, 30);
	return ans; //(shr((x), 1) ^ shl((x), 2) ^ rotl64((x), 13) ^ rotl64((x), 43));
}
示例#5
0
static
void mugi_lambda(uint64_t *b, uint64_t *a){
	uint8_t i;
	uint64_t t;
	t=b[15];
	for(i=15; i!=0; --i){
		b[i]=b[i-1];
	}
	b[0]   = t ^ *a;
	b[4]  ^= b[8];
	b[10] ^= rotl64(b[14], 32);
}
示例#6
0
void mugi_init(const void *key, const void *iv, mugi_ctx_t *ctx){
	uint8_t i;
	uint64_t a0;
	memcpy(ctx->a, key, 128/8);
	ctx->a[2] = rotl64(ctx->a[0], 7) ^ rotr64(ctx->a[1], 7) ^ C0;
	for(i=0; i<16;i++){
		mugi_rho_init(ctx->a);
		ctx->b[15-i] = ctx->a[0];
	}
	ctx->a[0] ^= ((uint64_t*)iv)[0];
	ctx->a[1] ^= ((uint64_t*)iv)[1];
	ctx->a[2] ^= rotl64(((uint64_t*)iv)[0], 7) ^ rotr64(((uint64_t*)iv)[1], 7) ^ C0;
	for(i=0; i<16;i++){
		mugi_rho_init(ctx->a);
	}
	for(i=0; i<15;i++){
		a0 = ctx->a[0];
		mugi_rho(ctx);
		mugi_lambda(ctx->b, &a0);
	}
	a0=0x00;
}
示例#7
0
static
void mugi_rho(mugi_ctx_t *ctx){
	uint64_t t,bx;
	t = ctx->a[1];
	ctx->a[1] = ctx->a[2];
	ctx->a[2] = ctx->a[0];
	ctx->a[0] = t;
	mugi_f(&t, &(ctx->a[0]), &(ctx->b[4]));
	ctx->a[1] ^= t ^ C1;
	bx = rotl64(ctx->b[10], 17);
	mugi_f(&t, &(ctx->a[0]), &bx);
	ctx->a[2] ^= t ^ C2;
} 
示例#8
0
int
main (void)
{
  ASSERT (rotl8 (42, 0) == 42);
  ASSERT (rotl8 (42, 1) == 84);
  ASSERT (rotl8 (42, 2) == 168);
  ASSERT (rotl8 (42, 3) == 81);
  ASSERT (rotl8 (42, 4) == 162);
  ASSERT (rotl8 (42, 5) == 69);
  ASSERT (rotl8 (42, 6) == 138);
  ASSERT (rotl8 (42, 7) == 21);
  ASSERT (rotl8 (42, 8) == 42);

  ASSERT (rotr8 (42, 0) == 42);
  ASSERT (rotr8 (42, 1) == 21);
  ASSERT (rotr8 (42, 2) == 138);
  ASSERT (rotr8 (42, 3) == 69);
  ASSERT (rotr8 (42, 4) == 162);
  ASSERT (rotr8 (42, 5) == 81);
  ASSERT (rotr8 (42, 6) == 168);
  ASSERT (rotr8 (42, 7) == 84);
  ASSERT (rotr8 (42, 8) == 42);

  ASSERT (rotl16 (43981, 0) == 43981);
  ASSERT (rotl16 (43981, 1) == 22427);
  ASSERT (rotl16 (43981, 2) == 44854);
  ASSERT (rotl16 (43981, 3) == 24173);
  ASSERT (rotl16 (43981, 4) == 48346);
  ASSERT (rotl16 (43981, 5) == 31157);
  ASSERT (rotl16 (43981, 6) == 62314);
  ASSERT (rotl16 (43981, 7) == 59093);
  ASSERT (rotl16 (43981, 8) == 52651);
  ASSERT (rotl16 (43981, 9) == 39767);
  ASSERT (rotl16 (43981, 10) == 13999);
  ASSERT (rotl16 (43981, 11) == 27998);
  ASSERT (rotl16 (43981, 12) == 55996);
  ASSERT (rotl16 (43981, 13) == 46457);
  ASSERT (rotl16 (43981, 14) == 27379);
  ASSERT (rotl16 (43981, 15) == 54758);
  ASSERT (rotl16 (43981, 16) == 43981);

  ASSERT (rotr16 (43981, 0) == 43981);
  ASSERT (rotr16 (43981, 1) == 54758);
  ASSERT (rotr16 (43981, 2) == 27379);
  ASSERT (rotr16 (43981, 3) == 46457);
  ASSERT (rotr16 (43981, 4) == 55996);
  ASSERT (rotr16 (43981, 5) == 27998);
  ASSERT (rotr16 (43981, 6) == 13999);
  ASSERT (rotr16 (43981, 7) == 39767);
  ASSERT (rotr16 (43981, 8) == 52651);
  ASSERT (rotr16 (43981, 9) == 59093);
  ASSERT (rotr16 (43981, 10) == 62314);
  ASSERT (rotr16 (43981, 11) == 31157);
  ASSERT (rotr16 (43981, 12) == 48346);
  ASSERT (rotr16 (43981, 13) == 24173);
  ASSERT (rotr16 (43981, 14) == 44854);
  ASSERT (rotr16 (43981, 15) == 22427);
  ASSERT (rotr16 (43981, 16) == 43981);

  ASSERT (rotl32 (2309737967U, 1) == 324508639U);
  ASSERT (rotl32 (2309737967U, 2) == 649017278U);
  ASSERT (rotl32 (2309737967U, 3) == 1298034556U);
  ASSERT (rotl32 (2309737967U, 4) == 2596069112U);
  ASSERT (rotl32 (2309737967U, 5) == 897170929U);
  ASSERT (rotl32 (2309737967U, 6) == 1794341858U);
  ASSERT (rotl32 (2309737967U, 7) == 3588683716U);
  ASSERT (rotl32 (2309737967U, 8) == 2882400137U);
  ASSERT (rotl32 (2309737967U, 9) == 1469832979U);
  ASSERT (rotl32 (2309737967U, 10) == 2939665958U);
  ASSERT (rotl32 (2309737967U, 11) == 1584364621U);
  ASSERT (rotl32 (2309737967U, 12) == 3168729242U);
  ASSERT (rotl32 (2309737967U, 13) == 2042491189U);
  ASSERT (rotl32 (2309737967U, 14) == 4084982378U);
  ASSERT (rotl32 (2309737967U, 15) == 3874997461U);
  ASSERT (rotl32 (2309737967U, 16) == 3455027627U);
  ASSERT (rotl32 (2309737967U, 17) == 2615087959U);
  ASSERT (rotl32 (2309737967U, 18) == 935208623U);
  ASSERT (rotl32 (2309737967U, 19) == 1870417246U);
  ASSERT (rotl32 (2309737967U, 20) == 3740834492U);
  ASSERT (rotl32 (2309737967U, 21) == 3186701689U);
  ASSERT (rotl32 (2309737967U, 22) == 2078436083U);
  ASSERT (rotl32 (2309737967U, 23) == 4156872166U);
  ASSERT (rotl32 (2309737967U, 24) == 4018777037U);
  ASSERT (rotl32 (2309737967U, 25) == 3742586779U);
  ASSERT (rotl32 (2309737967U, 26) == 3190206263U);
  ASSERT (rotl32 (2309737967U, 27) == 2085445231U);
  ASSERT (rotl32 (2309737967U, 28) == 4170890462U);
  ASSERT (rotl32 (2309737967U, 29) == 4046813629U);
  ASSERT (rotl32 (2309737967U, 30) == 3798659963U);
  ASSERT (rotl32 (2309737967U, 31) == 3302352631U);

  ASSERT (rotr32 (2309737967U, 1) == 3302352631lU);
  ASSERT (rotr32 (2309737967U, 2) == 3798659963lU);
  ASSERT (rotr32 (2309737967U, 3) == 4046813629lU);
  ASSERT (rotr32 (2309737967U, 4) == 4170890462lU);
  ASSERT (rotr32 (2309737967U, 5) == 2085445231lU);
  ASSERT (rotr32 (2309737967U, 6) == 3190206263lU);
  ASSERT (rotr32 (2309737967U, 7) == 3742586779lU);
  ASSERT (rotr32 (2309737967U, 8) == 4018777037lU);
  ASSERT (rotr32 (2309737967U, 9) == 4156872166lU);
  ASSERT (rotr32 (2309737967U, 10) == 2078436083lU);
  ASSERT (rotr32 (2309737967U, 11) == 3186701689lU);
  ASSERT (rotr32 (2309737967U, 12) == 3740834492lU);
  ASSERT (rotr32 (2309737967U, 13) == 1870417246lU);
  ASSERT (rotr32 (2309737967U, 14) == 935208623lU);
  ASSERT (rotr32 (2309737967U, 15) == 2615087959lU);
  ASSERT (rotr32 (2309737967U, 16) == 3455027627lU);
  ASSERT (rotr32 (2309737967U, 17) == 3874997461lU);
  ASSERT (rotr32 (2309737967U, 18) == 4084982378lU);
  ASSERT (rotr32 (2309737967U, 19) == 2042491189lU);
  ASSERT (rotr32 (2309737967U, 20) == 3168729242lU);
  ASSERT (rotr32 (2309737967U, 21) == 1584364621lU);
  ASSERT (rotr32 (2309737967U, 22) == 2939665958lU);
  ASSERT (rotr32 (2309737967U, 23) == 1469832979lU);
  ASSERT (rotr32 (2309737967U, 24) == 2882400137lU);
  ASSERT (rotr32 (2309737967U, 25) == 3588683716lU);
  ASSERT (rotr32 (2309737967U, 26) == 1794341858lU);
  ASSERT (rotr32 (2309737967U, 27) == 897170929lU);
  ASSERT (rotr32 (2309737967U, 28) == 2596069112lU);
  ASSERT (rotr32 (2309737967U, 29) == 1298034556lU);
  ASSERT (rotr32 (2309737967U, 30) == 649017278lU);
  ASSERT (rotr32 (2309737967U, 31) == 324508639lU);

#ifdef UINT64_MAX
  ASSERT (rotl64 (16045690984503098046ULL, 1) == 13644637895296644477ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 2) == 8842531716883737339ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 3) == 17685063433767474678ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 4) == 16923382793825397741ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 5) == 15400021513941243867ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 6) == 12353298954172936119ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 7) == 6259853834636320623ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 8) == 12519707669272641246ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 9) == 6592671264835730877ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 10) == 13185342529671461754ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 11) == 7923940985633371893ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 12) == 15847881971266743786ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 13) == 13249019868823935957ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 14) == 8051295663938320299ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 15) == 16102591327876640598ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 16) == 13758438582043729581ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 17) == 9070133090377907547ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 18) == 18140266180755815094ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 19) == 17833788287802078573ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 20) == 17220832501894605531ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 21) == 15994920930079659447ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 22) == 13543097786449767279ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 23) == 8639451499189982943ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 24) == 17278902998379965886ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 25) == 16111061923050380157ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 26) == 13775379772391208699ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 27) == 9104015471072865783ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 28) == 18208030942145731566ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 29) == 17969317810581911517ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 30) == 17491891547454271419ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 31) == 16537039021198991223ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 32) == 14627333968688430831ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 33) == 10807923863667310047ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 34) == 3169103653625068479ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 35) == 6338207307250136958ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 36) == 12676414614500273916ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 37) == 6906085155290996217ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 38) == 13812170310581992434ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 39) == 9177596547454433253ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 40) == 18355193094908866506ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 41) == 18263642116108181397ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 42) == 18080540158506811179ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 43) == 17714336243304070743ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 44) == 16981928412898589871ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 45) == 15517112752087628127ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 46) == 12587481430465704639ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 47) == 6728218787221857663ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 48) == 13456437574443715326ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 49) == 8466131075177879037ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 50) == 16932262150355758074ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 51) == 15417780227001964533ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 52) == 12388816380294377451ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 53) == 6330888686879203287ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 54) == 12661777373758406574ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 55) == 6876810673807261533ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 56) == 13753621347614523066ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 57) == 9060498621519494517ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 58) == 18120997243038989034ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 59) == 17795250412368426453ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 60) == 17143756751027301291ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 61) == 15840769428345050967ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 62) == 13234794782980550319ULL);
  ASSERT (rotl64 (16045690984503098046ULL, 63) == 8022845492251549023ULL);

  ASSERT (rotr64 (16045690984503098046ULL, 1) == 8022845492251549023ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 2) == 13234794782980550319ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 3) == 15840769428345050967ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 4) == 17143756751027301291ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 5) == 17795250412368426453ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 6) == 18120997243038989034ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 7) == 9060498621519494517ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 8) == 13753621347614523066ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 9) == 6876810673807261533ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 10) == 12661777373758406574ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 11) == 6330888686879203287ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 12) == 12388816380294377451ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 13) == 15417780227001964533ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 14) == 16932262150355758074ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 15) == 8466131075177879037ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 16) == 13456437574443715326ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 17) == 6728218787221857663ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 18) == 12587481430465704639ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 19) == 15517112752087628127ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 20) == 16981928412898589871ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 21) == 17714336243304070743ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 22) == 18080540158506811179ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 23) == 18263642116108181397ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 24) == 18355193094908866506ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 25) == 9177596547454433253ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 26) == 13812170310581992434ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 27) == 6906085155290996217ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 28) == 12676414614500273916ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 29) == 6338207307250136958ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 30) == 3169103653625068479ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 31) == 10807923863667310047ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 32) == 14627333968688430831ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 33) == 16537039021198991223ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 34) == 17491891547454271419ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 35) == 17969317810581911517ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 36) == 18208030942145731566ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 37) == 9104015471072865783ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 38) == 13775379772391208699ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 39) == 16111061923050380157ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 40) == 17278902998379965886ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 41) == 8639451499189982943ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 42) == 13543097786449767279ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 43) == 15994920930079659447ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 44) == 17220832501894605531ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 45) == 17833788287802078573ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 46) == 18140266180755815094ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 47) == 9070133090377907547ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 48) == 13758438582043729581ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 49) == 16102591327876640598ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 50) == 8051295663938320299ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 51) == 13249019868823935957ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 52) == 15847881971266743786ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 53) == 7923940985633371893ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 54) == 13185342529671461754ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 55) == 6592671264835730877ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 56) == 12519707669272641246ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 57) == 6259853834636320623ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 58) == 12353298954172936119ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 59) == 15400021513941243867ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 60) == 16923382793825397741ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 61) == 17685063433767474678ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 62) == 8842531716883737339ULL);
  ASSERT (rotr64 (16045690984503098046ULL, 63) == 13644637895296644477ULL);
#endif /* UINT64_MAX */

  return 0;
}
示例#9
0
// Decryption and Verification procedure
int crypto_aead_decrypt(
	unsigned char *m, unsigned long long *mlen,
	unsigned char *nsec,
	const unsigned char *c, unsigned long long clen,
	const unsigned char *ad, unsigned long long adlen,
	const unsigned char *npub,
	const unsigned char *k
	)
{
	//...
	//... the code for the cipher implementation goes here,
	//... generating a plaintext m[0],m[1],...,m[*mlen-1]
	//... and secret message number nsec[0],nsec[1],...
	//... from a ciphertext c[0],c[1],...,c[clen-1]
	//... and associated data ad[0],ad[1],...,ad[adlen-1]
	//... and public message number npub[0],npub[1],...
	//... and secret key k[0],k[1],...
	//...

	// some 64-bit temp variables
	u_int64_t  t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13;
	// more 64-bit temp variables
	u_int64_t  x0, x1, x2, x3, y0, y1, y2, y3, z0, z1, z2, z3;
	// more 64-bit temp variables
	u_int64_t  pom1, pom2, pom3, pom4, nu1, nu2, nu3, nu4;
	// more 64-bit temp variables
	u_int64_t IS0, IS1, IS2, IS3, IS4, IS5, IS6, IS7, IS8, IS9, IS10, IS11, IS12, IS13, IS14, IS15;
	// more 64-bit temp variables
	u_int64_t preCompIS0, preCompIS1, preCompIS2, preCompIS3, preCompIS4, preCompIS5, preCompIS6, preCompIS7, preCompIS8, preCompIS9, preCompIS10, preCompIS11, preCompIS12, preCompIS13, preCompIS14, preCompIS15;
	// pointers to 64-bit variables
	u_int64_t  *c64, *m64, *ad64, *nsec64, *npub64, *k64;
	// an array for storing some temporal values for the Tag computation
	u_int64_t  tempTag[CRYPTO_ABYTES / W] = { 0 };
	// counter ctr is a 64-bit variable in all variants of PiCipher
	u_int64_t  ctr = 0x0000000000000000ull;
	// an array for the Common Internal State
	u_int64_t  CIS[IS_SIZE] = { 0 };
	u_int64_t  CIS1[IS_SIZE] = { 0 };
	// pointers that look at the used data arrays as arrays of bytes
	u_int8_t   *InternalState8, *CommonInternalState8, *tempTag8;
	// variables for dealing with various lengths of the plaintext and associated data
	int LastMessageChunkLength, LastADChunkLength;
	// different iterator variables
	unsigned long long i, j, jj, ii, b, i1, j1, a, cblocks, ii1, ii2, b1;

	c64 = (u_int64_t *)c;
	m64 = (u_int64_t *)m;
	ad64 = (u_int64_t *)ad;
	nsec64 = (u_int64_t *)nsec;
	npub64 = (u_int64_t *)npub;
	k64 = (u_int64_t *)k;
	InternalState8 = (u_int8_t *)IS;
	CommonInternalState8 = (u_int8_t *)CIS;
	tempTag8 = (u_int8_t *)tempTag;

	// phase 1: Initialization
	/* for (i = 0; i < IS_SIZE; i++) {
	IS[i] = 0;
	} */
	IS[0] = 0;
	IS[1] = 0;
	IS[2] = 0;
	IS[3] = 0;
	IS[4] = 0;
	IS[5] = 0;
	IS[6] = 0;
	IS[7] = 0;
	IS[8] = 0;
	IS[9] = 0;
	IS[10] = 0;
	IS[11] = 0;
	IS[12] = 0;
	IS[13] = 0;
	IS[14] = 0;
	IS[15] = 0;

	// injection of the key
	/*for (i = 0; i < CRYPTO_KEYBYTES; i++) {
	InternalState8[i] = k[i];
	}*/
	IS[0] = k64[0];
	IS[1] = k64[1];
	IS[2] = k64[2];
	IS[3] = k64[3];

	// injection of the nonce (public message number - PMN)
	/*for (j = 0; j < CRYPTO_NPUBBYTES; j++) {
	InternalState8[i++] = npub[j];
	}*/
	IS[4] = npub64[0];
	IS[5] = npub64[1];
	// appending a single 1 to the concatenated value of the key and PMN
	InternalState8[48] = 0x01;

	// applying the permutation function pi
	pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);

	// initialization of the Common Internal State (CIS), common for all parallel invocations of pi() with different ctrs
	/* for (i = 0; i < IS_SIZE; i++) {
	CIS[i] = IS[i];
	} */
	CIS[0] = IS[0];
	CIS[1] = IS[1];
	CIS[2] = IS[2];
	CIS[3] = IS[3];
	CIS[4] = IS[4];
	CIS[5] = IS[5];
	CIS[6] = IS[6];
	CIS[7] = IS[7];
	CIS[8] = IS[8];
	CIS[9] = IS[9];
	CIS[10] = IS[10];
	CIS[11] = IS[11];
	CIS[12] = IS[12];
	CIS[13] = IS[13];
	CIS[14] = IS[14];
	CIS[15] = IS[15];

	// initialization of the ctr obtained from the first 64 bits of the capacity of CIS
	ctr = CIS[4];

	// phase 2: Processing the associated data
	nu64(CIS[4], CIS[5], CIS[6], CIS[7], CIS1[4], CIS1[5], CIS1[6], CIS1[7]);
	nu64(CIS[8], CIS[9], CIS[10], CIS[11], CIS1[8], CIS1[9], CIS1[10], CIS1[11]);
	nu64(CIS[12], CIS[13], CIS[14], CIS[15], CIS1[12], CIS1[13], CIS1[14], CIS1[15]);
	nu1 = 0x8D8B87787472716C + CIS[2] + CIS[3];
	nu2 = 0x6A696665635C5A59 + CIS[1] + CIS[2] + CIS[3];
	nu2 = rotl64((nu2), 23);
	nu3 = 0x5655534E4D4B473C + CIS[1] + CIS[2];
	nu4 = 0x3A393635332E2D2B + CIS[1] + CIS[3];
	b = 0;
	a = adlen / RATE;
	for (j = 0; j < a; j++) {
		// IS for the triplex component is initialized by the CIS for every AD block
		/* for (i = 0; i < IS_SIZE; i++) {
		IS[i] = CIS[i];
		} */
		IS[0] =  CIS[0];
		IS[1] =  CIS[1];
		IS[2] =  CIS[2];
		IS[3] =  CIS[3];
		IS[4] =  CIS1[4];
		IS[5] =  CIS1[5];
		IS[6] =  CIS1[6];
		IS[7] =  CIS1[7];
		IS[8] =  CIS1[8];
		IS[9] =  CIS1[9];
		IS[10] = CIS1[10];
		IS[11] = CIS1[11];
		IS[12] = CIS1[12];
		IS[13] = CIS1[13];
		IS[14] = CIS1[14];
		IS[15] = CIS1[15];
		ctr++;
		// Inject ctr + j in IS
		IS[0] = IS[0] ^ ctr;
		pi1(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15], nu1, nu2, nu3, nu4);
		//pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);
		// process the AD block
		// Inject the AD block
		/* for (i = 0; i < N; i += 2) {
		ii1 = i * WORDS_CHUNK;
		for (i1 = 0; i1 < WORDS_CHUNK; i1++) {
		IS[ii1] = IS[ii1] ^ ad64[b];
		b++;
		ii1++;
		}
		} */
		IS[0] = IS[0] ^ ad64[b];
		b++;
		IS[1] = IS[1] ^ ad64[b];
		b++;
		IS[2] = IS[2] ^ ad64[b];
		b++;
		IS[3] = IS[3] ^ ad64[b];
		b++;
		IS[8] = IS[8] ^ ad64[b];
		b++;
		IS[9] = IS[9] ^ ad64[b];
		b++;
		IS[10] = IS[10] ^ ad64[b];
		b++;
		IS[11] = IS[11] ^ ad64[b];
		b++;
		pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);
		// Collect the tag for this block
		// Sum of the tags componentwise, where the length of one component is W
		/* jj = 0;
		for (i = 0; i < N; i += 2) {
		ii1 = i * WORDS_CHUNK;
		for (i1 = 0; i1 < WORDS_CHUNK; i1++) {
		tempTag[jj] = tempTag[jj] + IS[ii1];
		jj++;
		ii1++;
		}
		} */
		tempTag[0] = tempTag[0] + IS[0];
		tempTag[1] = tempTag[1] + IS[1];
		tempTag[2] = tempTag[2] + IS[2];
		tempTag[3] = tempTag[3] + IS[3];
		tempTag[4] = tempTag[4] + IS[8];
		tempTag[5] = tempTag[5] + IS[9];
		tempTag[6] = tempTag[6] + IS[10];
		tempTag[7] = tempTag[7] + IS[11];
	}
	// if the last AD block is not the full block, we process it byte by byte
	LastADChunkLength = adlen % RATE;
	if (LastADChunkLength) {
		b = b * W;
		i1 = 0;
		/* for (i = 0; i < IS_SIZE; i++) {
		IS[i] = CIS[i];
		} */
		IS[0] =  CIS[0];
		IS[1] =  CIS[1];
		IS[2] =  CIS[2];
		IS[3] =  CIS[3];
		IS[4] =  CIS1[4];
		IS[5] =  CIS1[5];
		IS[6] =  CIS1[6];
		IS[7] =  CIS1[7];
		IS[8] =  CIS1[8];
		IS[9] =  CIS1[9];
		IS[10] = CIS1[10];
		IS[11] = CIS1[11];
		IS[12] = CIS1[12];
		IS[13] = CIS1[13];
		IS[14] = CIS1[14];
		IS[15] = CIS1[15];
		ctr++;
		IS[0] = IS[0] ^ ctr;
		pi1(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15], nu1, nu2, nu3, nu4);
		//pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);
		for (i = 0; i < LastADChunkLength; i++) {
			InternalState8[i1] = InternalState8[i1] ^ ad[b];
			i1++;
			if (i1 % (RATE_OUT) == 0) i1 += RATE_OUT;
			b++;
		}
		// padding with 10*
		InternalState8[i1] = InternalState8[i1] ^ 0x01;
		pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);
		//updating the tag
		/* jj = 0;
		for (i = 0; i < N; i += 2) {
		ii1 = i * WORDS_CHUNK;
		for (i1 = 0; i1 < WORDS_CHUNK; i1++) {
		tempTag[jj] = tempTag[jj] + IS[ii1];
		jj++;
		ii1++;
		}
		} */
		tempTag[0] = tempTag[0] + IS[0];
		tempTag[1] = tempTag[1] + IS[1];
		tempTag[2] = tempTag[2] + IS[2];
		tempTag[3] = tempTag[3] + IS[3];
		tempTag[4] = tempTag[4] + IS[8];
		tempTag[5] = tempTag[5] + IS[9];
		tempTag[6] = tempTag[6] + IS[10];
		tempTag[7] = tempTag[7] + IS[11];
	}
	// if the AD is full blocks we still need to append 10* and it is done in an additional block
	else {
		IS[0] =  CIS[0];
		IS[1] =  CIS[1];
		IS[2] =  CIS[2];
		IS[3] =  CIS[3];
		IS[4] =  CIS1[4];
		IS[5] =  CIS1[5];
		IS[6] =  CIS1[6];
		IS[7] =  CIS1[7];
		IS[8] =  CIS1[8];
		IS[9] =  CIS1[9];
		IS[10] = CIS1[10];
		IS[11] = CIS1[11];
		IS[12] = CIS1[12];
		IS[13] = CIS1[13];
		IS[14] = CIS1[14];
		IS[15] = CIS1[15];
		ctr++;
		IS[0] = IS[0] ^ ctr;
		pi1(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15], nu1, nu2, nu3, nu4);
		// padding with 10*
		InternalState8[0] = InternalState8[0] ^ 0x01;
		pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);
		//updating the tag
		tempTag[0] = tempTag[0] + IS[0];
		tempTag[1] = tempTag[1] + IS[1];
		tempTag[2] = tempTag[2] + IS[2];
		tempTag[3] = tempTag[3] + IS[3];
		tempTag[4] = tempTag[4] + IS[8];
		tempTag[5] = tempTag[5] + IS[9];
		tempTag[6] = tempTag[6] + IS[10];
		tempTag[7] = tempTag[7] + IS[11];
	}

	// updating the Common Internal State by injection of the tag (tempTag) obtained from the associated data
	/* jj = 0;
	for (i = 0; i < N; i += 2) {
	ii1 = i * WORDS_CHUNK;
	ii2 = (i + 1) * WORDS_CHUNK;
	for (i1 = 0; i1 < WORDS_CHUNK; i1++) {
	IS[ii1] = CIS[ii1] ^ tempTag[jj];
	IS[ii2] = CIS[ii2];
	jj++;
	ii1++;
	ii2++;
	}
	} */
	IS[0] = CIS[0] ^ tempTag[0];
	IS[1] = CIS[1] ^ tempTag[1];
	IS[2] = CIS[2] ^ tempTag[2];
	IS[3] = CIS[3] ^ tempTag[3];
	IS[4] = CIS[4];
	IS[5] = CIS[5];
	IS[6] = CIS[6];
	IS[7] = CIS[7];
	IS[8] = CIS[8] ^ tempTag[4];
	IS[9] = CIS[9] ^ tempTag[5];
	IS[10] = CIS[10] ^ tempTag[6];
	IS[11] = CIS[11] ^ tempTag[7];
	IS[12] = CIS[12];
	IS[13] = CIS[13];
	IS[14] = CIS[14];
	IS[15] = CIS[15];
	pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);
	/* for (i = 0; i < IS_SIZE; i++) {
	CIS[i] = IS[i];
	} */
	CIS[0] = IS[0];
	CIS[1] = IS[1];
	CIS[2] = IS[2];
	CIS[3] = IS[3];
	CIS[4] = IS[4];
	CIS[5] = IS[5];
	CIS[6] = IS[6];
	CIS[7] = IS[7];
	CIS[8] = IS[8];
	CIS[9] = IS[9];
	CIS[10] = IS[10];
	CIS[11] = IS[11];
	CIS[12] = IS[12];
	CIS[13] = IS[13];
	CIS[14] = IS[14];
	CIS[15] = IS[15];

	// phase 3: Processing the secret message number
	if (CRYPTO_NSECBYTES > 0) {
		nu64(CIS[4], CIS[5], CIS[6], CIS[7], CIS[4], CIS[5], CIS[6], CIS[7]);
		nu64(CIS[8], CIS[9], CIS[10], CIS[11], CIS[8], CIS[9], CIS[10], CIS[11]);
		nu64(CIS[12], CIS[13], CIS[14], CIS[15], CIS[12], CIS[13], CIS[14], CIS[15]);
		nu1 = 0x8D8B87787472716C + CIS[2] + CIS[3];
		nu2 = 0x6A696665635C5A59 + CIS[1] + CIS[2] + CIS[3];
		nu2 = rotl64((nu2), 23);
		nu3 = 0x5655534E4D4B473C + CIS[1] + CIS[2];
		nu4 = 0x3A393635332E2D2B + CIS[1] + CIS[3];
		/* for (i = 0; i < IS_SIZE; i++) {
		IS[i] = CIS[i];
		} */
		IS[0] = CIS[0];
		IS[1] = CIS[1];
		IS[2] = CIS[2];
		IS[3] = CIS[3];
		IS[4] = CIS[4];
		IS[5] = CIS[5];
		IS[6] = CIS[6];
		IS[7] = CIS[7];
		IS[8] = CIS[8];
		IS[9] = CIS[9];
		IS[10] = CIS[10];
		IS[11] = CIS[11];
		IS[12] = CIS[12];
		IS[13] = CIS[13];
		IS[14] = CIS[14];
		IS[15] = CIS[15];
		ctr++;
		IS[0] = IS[0] ^ ctr;
		pi1(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15], nu1, nu2, nu3, nu4);
		//pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);
		// decrypt the SMN
		// Inject the SMN
		b = 0;
		/* for (i = 0; i < N; i += 2) {
		ii1 = i * WORDS_CHUNK;
		for (i1 = 0; i1 < WORDS_CHUNK; i1++) {
		nsec64[b] = IS[ii1] ^ c64[b];
		IS[ii1] = c64[b];
		b++;
		ii1++;
		}
		} */
		nsec64[b] = IS[0] ^ c64[b];
		IS[0] = c64[b];
		b++;
		nsec64[b] = IS[1] ^ c64[b];
		IS[1] = c64[b];
		b++;
		nsec64[b] = IS[2] ^ c64[b];
		IS[2] = c64[b];
		b++;
		nsec64[b] = IS[3] ^ c64[b];
		IS[3] = c64[b];
		b++;
		nsec64[b] = IS[8] ^ c64[b];
		IS[8] = c64[b];
		b++;
		nsec64[b] = IS[9] ^ c64[b];
		IS[9] = c64[b];
		b++;
		nsec64[b] = IS[10] ^ c64[b];
		IS[10] = c64[b];
		b++;
		nsec64[b] = IS[11] ^ c64[b];
		IS[11] = c64[b];
		pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);

		// updating the Common Internal State after decrypting the SMN
		/* for (i = 0; i < IS_SIZE; i++) {
		CIS[i] = IS[i];
		} */
		CIS[0] = IS[0];
		CIS[1] = IS[1];
		CIS[2] = IS[2];
		CIS[3] = IS[3];
		CIS[4] = IS[4];
		CIS[5] = IS[5];
		CIS[6] = IS[6];
		CIS[7] = IS[7];
		CIS[8] = IS[8];
		CIS[9] = IS[9];
		CIS[10] = IS[10];
		CIS[11] = IS[11];
		CIS[12] = IS[12];
		CIS[13] = IS[13];
		CIS[14] = IS[14];
		CIS[15] = IS[15];
		// Collect the tag from this encryption and update the tempTag
		/* jj = 0;
		for (i = 0; i < N; i += 2) {
		ii1 = i * WORDS_CHUNK;
		for (i1 = 0; i1 < WORDS_CHUNK; i1++) {
		tempTag[jj] = tempTag[jj] + IS[ii1];
		jj++;
		ii1++;
		}
		} */
		tempTag[0] = tempTag[0] + IS[0];
		tempTag[1] = tempTag[1] + IS[1];
		tempTag[2] = tempTag[2] + IS[2];
		tempTag[3] = tempTag[3] + IS[3];
		tempTag[4] = tempTag[4] + IS[8];
		tempTag[5] = tempTag[5] + IS[9];
		tempTag[6] = tempTag[6] + IS[10];
		tempTag[7] = tempTag[7] + IS[11];
	}

	//phase 4: Processing the ciphertext
	nu64(CIS[4], CIS[5], CIS[6], CIS[7], CIS[4], CIS[5], CIS[6], CIS[7]);
	nu64(CIS[8], CIS[9], CIS[10], CIS[11], CIS[8], CIS[9], CIS[10], CIS[11]);
	nu64(CIS[12], CIS[13], CIS[14], CIS[15], CIS[12], CIS[13], CIS[14], CIS[15]);
	nu1 = 0x8D8B87787472716C + CIS[2] + CIS[3];
	nu2 = 0x6A696665635C5A59 + CIS[1] + CIS[2] + CIS[3];
	nu2 = rotl64((nu2), 23);
	nu3 = 0x5655534E4D4B473C + CIS[1] + CIS[2];
	nu4 = 0x3A393635332E2D2B + CIS[1] + CIS[3];
	cblocks = (clen - CRYPTO_ABYTES - CRYPTO_NSECBYTES) / RATE;
	b = 0;
	b1 = bSMN;
	for (j = 0; j < cblocks; j++) {
		/* for (i = 0; i < IS_SIZE; i++) {
		IS[i] = CIS[i];
		} */
		IS[0] = CIS[0];
		IS[1] = CIS[1];
		IS[2] = CIS[2];
		IS[3] = CIS[3];
		IS[4] = CIS[4];
		IS[5] = CIS[5];
		IS[6] = CIS[6];
		IS[7] = CIS[7];
		IS[8] = CIS[8];
		IS[9] = CIS[9];
		IS[10] = CIS[10];
		IS[11] = CIS[11];
		IS[12] = CIS[12];
		IS[13] = CIS[13];
		IS[14] = CIS[14];
		IS[15] = CIS[15];
		ctr++;
		IS[0] = IS[0] ^ ctr;
		pi1(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15], nu1, nu2, nu3, nu4);
		//pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);
		// decrypt the ciphertext c
		/* for (i = 0; i < N; i += 2) {
		ii1 = i * WORDS_CHUNK;
		for (i1 = 0; i1 < WORDS_CHUNK; i1++) {
		// XOR the IS_bitrate (InternalState[0], InternalSate[2], ...) with the c to obtain m
		m64[b] = IS[ii1] ^ c64[b1];
		// in order to proceed for tag computation, put the ciphertext data in the InternalState
		IS[ii1] = c64[b1];
		b++;
		b1++;
		ii1++;
		}
		} */
		m64[b] = IS[0] ^ c64[b1];
		IS[0] = c64[b1];
		b++;
		b1++;
		m64[b] = IS[1] ^ c64[b1];
		IS[1] = c64[b1];
		b++;
		b1++;
		m64[b] = IS[2] ^ c64[b1];
		IS[2] = c64[b1];
		b++;
		b1++;
		m64[b] = IS[3] ^ c64[b1];
		IS[3] = c64[b1];
		b++;
		b1++;
		m64[b] = IS[8] ^ c64[b1];
		IS[8] = c64[b1];
		b++;
		b1++;
		m64[b] = IS[9] ^ c64[b1];
		IS[9] = c64[b1];
		b++;
		b1++;
		m64[b] = IS[10] ^ c64[b1];
		IS[10] = c64[b1];
		b++;
		b1++;
		m64[b] = IS[11] ^ c64[b1];
		IS[11] = c64[b1];
		b++;
		b1++;
		pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);
		// Collect the tag from this decryption and update the tempTag
		/* jj = 0;
		for (i = 0; i < N; i += 2) {
		ii1 = i * WORDS_CHUNK;
		for (i1 = 0; i1 < WORDS_CHUNK; i1++) {
		tempTag[jj] = tempTag[jj] + IS[ii1];
		jj++;
		ii1++;
		}
		} */
		tempTag[0] = tempTag[0] + IS[0];
		tempTag[1] = tempTag[1] + IS[1];
		tempTag[2] = tempTag[2] + IS[2];
		tempTag[3] = tempTag[3] + IS[3];
		tempTag[4] = tempTag[4] + IS[8];
		tempTag[5] = tempTag[5] + IS[9];
		tempTag[6] = tempTag[6] + IS[10];
		tempTag[7] = tempTag[7] + IS[11];
	}
	// if the last ciphertext block is not the full block, we process it byte by byte
	LastMessageChunkLength = (clen - CRYPTO_ABYTES - CRYPTO_NSECBYTES) % RATE;
	if (LastMessageChunkLength) {
		b = b * W;
		b1 = CRYPTO_NSECBYTES + b;
		i1 = 0;
		/* for (i = 0; i < IS_SIZE; i++) {
		IS[i] = CIS[i];
		} */
		IS[0] = CIS[0];
		IS[1] = CIS[1];
		IS[2] = CIS[2];
		IS[3] = CIS[3];
		IS[4] = CIS[4];
		IS[5] = CIS[5];
		IS[6] = CIS[6];
		IS[7] = CIS[7];
		IS[8] = CIS[8];
		IS[9] = CIS[9];
		IS[10] = CIS[10];
		IS[11] = CIS[11];
		IS[12] = CIS[12];
		IS[13] = CIS[13];
		IS[14] = CIS[14];
		IS[15] = CIS[15];
		ctr++;
		IS[0] = IS[0] ^ ctr;
		pi1(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15], nu1, nu2, nu3, nu4);
		//pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);
		for (i = 0; i < LastMessageChunkLength; i++) {
			m[b] = InternalState8[i1] ^ c[b1];
			InternalState8[i1] = c[b1];
			i1++;
			if (i1 % (RATE_OUT) == 0) i1 += RATE_OUT;
			b++;
			b1++;
		}
		// padding with 10*
		InternalState8[i1] = InternalState8[i1] ^ 0x01;
		pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);
		// updating the tag
		/* jj = 0;
		for (i = 0; i < N; i += 2) {
		ii1 = i * WORDS_CHUNK;
		for (i1 = 0; i1 < WORDS_CHUNK; i1++) {
		tempTag[jj] = tempTag[jj] + IS[ii1];
		jj++;
		ii1++;
		}
		} */
		tempTag[0] = tempTag[0] + IS[0];
		tempTag[1] = tempTag[1] + IS[1];
		tempTag[2] = tempTag[2] + IS[2];
		tempTag[3] = tempTag[3] + IS[3];
		tempTag[4] = tempTag[4] + IS[8];
		tempTag[5] = tempTag[5] + IS[9];
		tempTag[6] = tempTag[6] + IS[10];
		tempTag[7] = tempTag[7] + IS[11];
	}
	// if the message is full blocks we still need to append 10* and it is done in an additional block
	else {
		IS[0] =  CIS[0];
		IS[1] =  CIS[1];
		IS[2] =  CIS[2];
		IS[3] =  CIS[3];
		IS[4] =  CIS1[4];
		IS[5] =  CIS1[5];
		IS[6] =  CIS1[6];
		IS[7] =  CIS1[7];
		IS[8] =  CIS1[8];
		IS[9] =  CIS1[9];
		IS[10] = CIS1[10];
		IS[11] = CIS1[11];
		IS[12] = CIS1[12];
		IS[13] = CIS1[13];
		IS[14] = CIS1[14];
		IS[15] = CIS1[15];
		ctr++;
		IS[0] = IS[0] ^ ctr;
		pi1(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15], nu1, nu2, nu3, nu4);
		// padding with 10*
		InternalState8[0] = InternalState8[0] ^ 0x01;
		pi(IS[0], IS[1], IS[2], IS[3], IS[4], IS[5], IS[6], IS[7], IS[8], IS[9], IS[10], IS[11], IS[12], IS[13], IS[14], IS[15]);
		//updating the tag
		tempTag[0] = tempTag[0] + IS[0];
		tempTag[1] = tempTag[1] + IS[1];
		tempTag[2] = tempTag[2] + IS[2];
		tempTag[3] = tempTag[3] + IS[3];
		tempTag[4] = tempTag[4] + IS[8];
		tempTag[5] = tempTag[5] + IS[9];
		tempTag[6] = tempTag[6] + IS[10];
		tempTag[7] = tempTag[7] + IS[11];
	}

	//updating the length of the message
	*mlen = clen - CRYPTO_ABYTES - CRYPTO_NSECBYTES;

	// tag verification
	b = (*mlen + CRYPTO_NSECBYTES);
	for (ii = b; ii < clen; ii++) {
		if (c[ii] != tempTag8[ii - b])
			return -1;
	}

	return 0;
}