예제 #1
0
void speck128_encryptx(const void *key, void *in)
{
  uint32_t i, t, k0, k1, k2, k3, x0, x1;
  bc_blk   *x=(bc_blk*)in;
  bc_blk   *k=(bc_blk*)key;
  
  // copy 128-bit key to local registers
  k0 = k->w[0]; k1 = k->w[1];
  k2 = k->w[2]; k3 = k->w[3];
  
  // copy M to local space
  x0 = x->w[0]; x1 = x->w[1];
  
  for (i=0; i<27; i++)
  {
    // encrypt block
    x0 = (ROTR32(x0, 8) + x1) ^ k0;
    x1 =  ROTL32(x1, 3) ^ x0;
    
    // create next subkey
    k1 = (ROTR32(k1, 8) + k0) ^ i;
    k0 =  ROTL32(k0, 3) ^ k1;
    
    XCHG(k3, k2, t);
    XCHG(k3, k1, t);    
  }
  // save result
  x->w[0] = x0; x->w[1] = x1;
}
예제 #2
0
void rc5_dec(void *buffer, const rc5_ctx_t *ctx){
	uint8_t i;
	for(i=ctx->rounds; i>0; --i){
		B = ROTR32(B - ctx->s[i*2+1], A&31) ^ A;
		A = ROTR32(A - ctx->s[i*2+0], B&31) ^ B;
	} 
	B -= ctx->s[1];
	A -= ctx->s[0];
}
예제 #3
0
파일: xoodoo3.c 프로젝트: odzhan/tinycrypt
void xoodoo(void *state) {
    uint32_t e[4], a0, a1, a2, a3, t;
    int      i, x, y, j;
    uint32_t *p, *q;
    uint32_t (*A)[4] = (uint32_t(*)[4])state; 
    
    p=(uint32_t*)A[0];
      
    // 12 rounds by default
    for (i=0; i<12; i++) {
      // θ
      for (x=0; x<4; x++) {
        e[x] = ROTR32(p[x] ^ p[x+4] ^ p[x+8], 18);
        e[x]^= ROTR32(e[x], 9);
      }

      for (x=0; x<12; x++) {
        p[x] ^= e[(x - 1) & 3];
      }

      // ρ_west
      XCHG(p[7], p[4]);
      XCHG(p[7], p[5]);
      XCHG(p[7], p[6]);

      for (x=0; x<4; x++) {
        p[x+8] = ROTR32(p[x+8], 21); 
      }
  
      // ι
      p[0] ^= (uint32_t)rc[i];

      // χ
      for (x=0; x<4; x++) {
        a0 = p[x+0];
        a1 = p[x+4];
        a2 = p[x+8];

        p[x+0] ^= ~a1 & a2;        
        p[x+4] ^= ~a2 & a0;        
        p[x+8] ^= ~a0 & a1;                
      }

      // ******
      // ρ_east
      // ******
      for (x=0;x<4; x++) {       
        p[x+4] = ROTR32(p[x+4], 31); 
        p[x+8] = ROTR32(p[x+8], 24);
      } 
      XCHG(p[8], p[10]);
      XCHG(p[9], p[11]); 
    }
}
예제 #4
0
파일: rc5z.c 프로젝트: odzhan/tinycrypt
void rc5_encryptx(void *key, void *data)
{
    uint32_t A, B, t;
    uint32_t L[4], S[RC5_KR];
    int      i, j,r;    
    
    uint32_t *k=(uint32_t*)key;
    uint32_t *x=(uint32_t*)data;
    
    // initialize L with 128-bit key
    memcpy(&L, key, 16);

    A = 0xC983AE2D; //RC5_P;
    
    // initialize S with constants
    for (i=RC5_KR-1; i>=0; i--) {
      A -= RC5_Q;     
      S[i] = A;
    }
    
    A = B = 0;
    r = (RC5_KR*3) - 1;
    
    // create subkeys
    for (i=0, j=0; r>0; i++, j++, r--) {
      // i needs reset?
      if (i==RC5_KR) i=0;
      
      A = S[i%RC5_KR] = ROTR32(S[i%RC5_KR] + A+B, 32-3);  
      B = L[j%4] = ROTR32(L[j%4] + A+B, 32-U8V(A+B));
    }
    
    // assign subkeys to k ptr 
    k = S;
    
    // load plaintext
    A = x[0] + *k; k++; 
    B = x[1] + *k; k++;

    // apply encryption
    for (i=RC5_KR-2; i>0; i--) {
      // original spec uses ROTL32
      A = ROTR32(A ^ B, 32 - U8V(B)) + *k; k++;   
      // rotate
      XCHG(A, B);
    }
    // save
    x[0] = A; x[1] = B;
}
예제 #5
0
파일: xoodoo3.c 프로젝트: odzhan/tinycrypt
void shift(uint32_t A[3][4], int d) {
    uint32_t  *p=(uint32_t*)A[1];
    uint32_t  i, j, x, r;
    uint32_t  a[4];

    i = -1;
    r = 0x1500;
    x = 1;
    
    if (d) {
      i = 2;
      r = 0x181F;
      x = 0;
    }

    do {
      for (j=0; j<4; j++) {
        a[j] = ROTR32(p[(j - x) & 3], r & 0xFF); 
      }
      memcpy(p, a, 16);
      p += 4;
      x += i;
      r >>= 8;
    } while (r != 0); 
}*/
예제 #6
0
static
void theta(const uint32_t *k, uint32_t *a){
	uint32_t temp;

	temp = a[0] ^ a[2]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
	a[1] ^= temp;
	a[3] ^= temp;
	
	a[0] ^= k[0];
	a[1] ^= k[1];
	a[2] ^= k[2];
	a[3] ^= k[3];

	temp = a[1] ^ a[3]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
	a[0] ^= temp;
	a[2] ^= temp;	

}
static INLINE void
tkip_micblock(uint32 *left, uint32 *right)
{
	uint32 l = *left;
	uint32 r = *right;

	r ^= ROTR32(l, 15);
	l += r; 
	r ^= XSWAP32(l);
	l += r; 
	r ^= ROTR32(l, 29);
	l += r; 
	r ^= ROTR32(l, 2);
	l += r; 

	*left = l;
	*right = r;
}
예제 #8
0
static
void blake_small_compress(uint32_t *v, const void *m)
{
    uint8_t r, i;
    uint8_t a, b, c, d, s0, s1, sigma_idx = 0;
    uint32_t lv[4];
    for (r = 0; r < 14; ++r) {
        for (i = 0; i < 8; ++i) {
            a = pgm_read_byte(blake_index_lut + 4 * i + 0);
            b = pgm_read_byte(blake_index_lut + 4 * i + 1);
            c = pgm_read_byte(blake_index_lut + 4 * i + 2);
            d = pgm_read_byte(blake_index_lut + 4 * i + 3);
            s0 = pgm_read_byte(blake_sigma + sigma_idx);
            s1 = s0 & 0xf;
            s0 >>= 4;
            ++sigma_idx;
            if (sigma_idx >= 80) {
                sigma_idx -= 80;
            }
            lv[0] = v[a];
            lv[1] = v[b];
            lv[2] = v[c];
            lv[3] = v[d];

            lv[0] += lv[1]
                    + (((uint32_t*) m)[s0] ^ pgm_read_dword(&(blake_c[s1])));
            lv[3] = ROTR32(lv[3] ^ lv[0], 16);
            lv[2] += lv[3];
            lv[1] = ROTR32(lv[1] ^ lv[2], 12);
            lv[0] += lv[1]
                    + (((uint32_t*) m)[s1] ^ pgm_read_dword(&(blake_c[s0])));
            lv[3] = ROTR32(lv[3] ^ lv[0], 8);
            lv[2] += lv[3];
            lv[1] = ROTR32(lv[1] ^ lv[2], 7);

            v[a] = lv[0];
            v[b] = lv[1];
            v[c] = lv[2];
            v[d] = lv[3];
        }
    }
}
예제 #9
0
/*
 * "Michael" Messge Integrity Check (MIC) algorithm
 */
static INLINE void
tkip_micblock(uint32 *left, uint32 *right)
{
	uint32 l = *left;
	uint32 r = *right;

	/*
	 * Per Henry, we replaced the ROTL with ROTR
	 */
	r ^= ROTR32(l, 15);
	l += r; /* mod 2^32 */
	r ^= XSWAP32(l);
	l += r; /* mod 2^32 */
	r ^= ROTR32(l, 29);
	l += r; /* mod 2^32 */
	r ^= ROTR32(l, 2);
	l += r; /* mod 2^32 */

	*left = l;
	*right = r;
}
예제 #10
0
파일: gimli.c 프로젝트: odzhan/tinycrypt
void gimlix(void *state)
{
    int      r, j;
    uint32_t t, x, y, z;
    uint32_t *s=(uint32_t*)state;

    for (r=24; r>0; --r) {
      // apply SP-box
      for (j=0; j<4; j++) {
        x = ROTR32(s[    j],  8);
        y = ROTR32(s[4 + j], 23);
        z =        s[8 + j];

        s[8 + j] = x ^ (z << 1) ^ ((y & z) << 2);
        s[4 + j] = y ^ x        ^ ((x | z) << 1);
        s[j]     = z ^ y        ^ ((x & y) << 3);
      }

      // apply Linear layer
      t = r & 3;

      // if zero, do small swap
      if (t == 0) {
        XCHG(s[0], s[1]);
        XCHG(s[2], s[3]);

        // add constant
        s[0] ^= (0x9e377900 | r);
      }
      // if 2, do big swap
      if (t == 2) {
        XCHG(s[0], s[2]);
        XCHG(s[1], s[3]);
      }
    }
}
예제 #11
0
파일: cham.c 프로젝트: odzhan/tinycrypt
void cham(void *key, void *data)
{
    int      i;
    uint32_t x0, x1, x2, x3, k0, k1, k2, t0;
    uint32_t rk[2*KW];
    uint32_t *x, *k;

    k = (uint32_t*)key;
    x = (uint32_t*)data;

    // derive round keys from 128-bit key
    for (i=0; i<KW; i++) {
      k0    = k[i];
      k1    = ROTR32(k0, 31);
      k2    = ROTR32(k0, 24);
      k0   ^= k1;
      rk[i] = k0 ^ k2;
      k2    = ROTR32(k2, 29);
      k0   ^= k2;
      rk[(i+KW)^1] = k0;
    }

    // load 128-bit plain text
    x0 = x[0]; x1 = x[1];
    x2 = x[2]; x3 = x[3];

    // perform encryption
    for (i=0; i<R; i++)
    {
      k0 = x3;   // backup x3
      x0^= i;    // xor by round number

      // execution depends on (i % 2)
      x3 = rk[i & 7];
      x3^= (i & 1) ? ROTR32(x1, 24) : ROTR32(x1, 31);

      x3+= x0;
      x3 = (i & 1) ? ROTR32(x3, 31) : ROTR32(x3, 24);

      // swap
      x0 = x1; x1 = x2; x2 = k0;
    }
    x[0] = x0; x[1] = x1;
    x[2] = x2; x[3] = x3;
}
예제 #12
0
static
void pi2(uint32_t *a){
	a[1] = ROTR32(a[1], 1);
	a[2] = ROTR32(a[2], 5);
	a[3] = ROTR32(a[3], 2);
}
예제 #13
0
/* Buffer mixer (compressor) */
static void blake2s_compress(blake2s_state *S) {
    uint *v = (uint *) S->tempbuf;
    uint *m = (uint *) S->buf;
    register uint t0, t1, t2, t3;

    v[0]  = S->h[0];
    v[1]  = S->h[1];
    v[2]  = S->h[2];
    v[3]  = S->h[3];
    v[4]  = S->h[4];
    v[5]  = S->h[5];
    v[6]  = S->h[6];
    v[7]  = S->h[7];
    v[8]  = blake2s_IV[0];
    v[9]  = blake2s_IV[1];
    v[10] = blake2s_IV[2];
    v[11] = blake2s_IV[3];
    v[12] = S->t[0] ^ blake2s_IV[4];
    v[13] = S->t[1] ^ blake2s_IV[5];
    v[14] = S->f[0] ^ blake2s_IV[6];
    v[15] = S->f[1] ^ blake2s_IV[7];

/* Round 0 */
    t0 = v[0];
    t1 = v[4];
    t0 = t0 + t1 + m[0];
    t3 = v[12];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[1];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

    t0 = v[1];
    t1 = v[5];
    t0 = t0 + t1 + m[2];
    t3 = v[13];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[3];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[2];
    t1 = v[6];
    t0 = t0 + t1 + m[4];
    t3 = v[14];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[5];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[3];
    t1 = v[7];
    t0 = t0 + t1 + m[6];
    t3 = v[15];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[7];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[0];
    t1 = v[5];
    t0 = t0 + t1 + m[8];
    t3 = v[15];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[9];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[1];
    t1 = v[6];
    t0 = t0 + t1 + m[10];
    t3 = v[12];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[11];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[2];
    t1 = v[7];
    t0 = t0 + t1 + m[12];
    t3 = v[13];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[13];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[3];
    t1 = v[4];
    t0 = t0 + t1 + m[14];
    t3 = v[14];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[15];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

/* Round 1 */
    t0 = v[0];
    t1 = v[4];
    t0 = t0 + t1 + m[14];
    t3 = v[12];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[10];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

    t0 = v[1];
    t1 = v[5];
    t0 = t0 + t1 + m[4];
    t3 = v[13];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[8];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[2];
    t1 = v[6];
    t0 = t0 + t1 + m[9];
    t3 = v[14];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[15];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[3];
    t1 = v[7];
    t0 = t0 + t1 + m[13];
    t3 = v[15];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[6];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[0];
    t1 = v[5];
    t0 = t0 + t1 + m[1];
    t3 = v[15];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[12];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[1];
    t1 = v[6];
    t0 = t0 + t1 + m[0];
    t3 = v[12];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[2];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[2];
    t1 = v[7];
    t0 = t0 + t1 + m[11];
    t3 = v[13];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[7];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[3];
    t1 = v[4];
    t0 = t0 + t1 + m[5];
    t3 = v[14];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[3];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

/* Round 2 */
    t0 = v[0];
    t1 = v[4];
    t0 = t0 + t1 + m[11];
    t3 = v[12];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[8];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

    t0 = v[1];
    t1 = v[5];
    t0 = t0 + t1 + m[12];
    t3 = v[13];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[0];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[2];
    t1 = v[6];
    t0 = t0 + t1 + m[5];
    t3 = v[14];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[2];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[3];
    t1 = v[7];
    t0 = t0 + t1 + m[15];
    t3 = v[15];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[13];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[0];
    t1 = v[5];
    t0 = t0 + t1 + m[10];
    t3 = v[15];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[14];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[1];
    t1 = v[6];
    t0 = t0 + t1 + m[3];
    t3 = v[12];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[6];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[2];
    t1 = v[7];
    t0 = t0 + t1 + m[7];
    t3 = v[13];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[1];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[3];
    t1 = v[4];
    t0 = t0 + t1 + m[9];
    t3 = v[14];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[4];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

/* Round 3 */
    t0 = v[0];
    t1 = v[4];
    t0 = t0 + t1 + m[7];
    t3 = v[12];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[9];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

    t0 = v[1];
    t1 = v[5];
    t0 = t0 + t1 + m[3];
    t3 = v[13];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[1];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[2];
    t1 = v[6];
    t0 = t0 + t1 + m[13];
    t3 = v[14];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[12];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[3];
    t1 = v[7];
    t0 = t0 + t1 + m[11];
    t3 = v[15];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[14];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[0];
    t1 = v[5];
    t0 = t0 + t1 + m[2];
    t3 = v[15];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[6];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[1];
    t1 = v[6];
    t0 = t0 + t1 + m[5];
    t3 = v[12];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[10];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[2];
    t1 = v[7];
    t0 = t0 + t1 + m[4];
    t3 = v[13];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[0];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[3];
    t1 = v[4];
    t0 = t0 + t1 + m[15];
    t3 = v[14];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[8];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

/* Round 4 */
    t0 = v[0];
    t1 = v[4];
    t0 = t0 + t1 + m[9];
    t3 = v[12];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[0];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

    t0 = v[1];
    t1 = v[5];
    t0 = t0 + t1 + m[5];
    t3 = v[13];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[7];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[2];
    t1 = v[6];
    t0 = t0 + t1 + m[2];
    t3 = v[14];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[4];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[3];
    t1 = v[7];
    t0 = t0 + t1 + m[10];
    t3 = v[15];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[15];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[0];
    t1 = v[5];
    t0 = t0 + t1 + m[14];
    t3 = v[15];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[1];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[1];
    t1 = v[6];
    t0 = t0 + t1 + m[11];
    t3 = v[12];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[12];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[2];
    t1 = v[7];
    t0 = t0 + t1 + m[6];
    t3 = v[13];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[8];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[3];
    t1 = v[4];
    t0 = t0 + t1 + m[3];
    t3 = v[14];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[13];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

/* Round 5 */
    t0 = v[0];
    t1 = v[4];
    t0 = t0 + t1 + m[2];
    t3 = v[12];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[12];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

    t0 = v[1];
    t1 = v[5];
    t0 = t0 + t1 + m[6];
    t3 = v[13];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[10];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[2];
    t1 = v[6];
    t0 = t0 + t1 + m[0];
    t3 = v[14];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[11];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[3];
    t1 = v[7];
    t0 = t0 + t1 + m[8];
    t3 = v[15];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[3];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[0];
    t1 = v[5];
    t0 = t0 + t1 + m[4];
    t3 = v[15];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[13];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[1];
    t1 = v[6];
    t0 = t0 + t1 + m[7];
    t3 = v[12];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[5];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[2];
    t1 = v[7];
    t0 = t0 + t1 + m[15];
    t3 = v[13];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[14];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[3];
    t1 = v[4];
    t0 = t0 + t1 + m[1];
    t3 = v[14];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[9];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

/* Round 6 */
    t0 = v[0];
    t1 = v[4];
    t0 = t0 + t1 + m[12];
    t3 = v[12];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[5];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

    t0 = v[1];
    t1 = v[5];
    t0 = t0 + t1 + m[1];
    t3 = v[13];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[15];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[2];
    t1 = v[6];
    t0 = t0 + t1 + m[14];
    t3 = v[14];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[13];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[3];
    t1 = v[7];
    t0 = t0 + t1 + m[4];
    t3 = v[15];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[10];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[0];
    t1 = v[5];
    t0 = t0 + t1 + m[0];
    t3 = v[15];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[7];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[1];
    t1 = v[6];
    t0 = t0 + t1 + m[6];
    t3 = v[12];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[3];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[2];
    t1 = v[7];
    t0 = t0 + t1 + m[9];
    t3 = v[13];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[2];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[3];
    t1 = v[4];
    t0 = t0 + t1 + m[8];
    t3 = v[14];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[11];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

/* Round 7 */
    t0 = v[0];
    t1 = v[4];
    t0 = t0 + t1 + m[13];
    t3 = v[12];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[11];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

    t0 = v[1];
    t1 = v[5];
    t0 = t0 + t1 + m[7];
    t3 = v[13];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[14];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[2];
    t1 = v[6];
    t0 = t0 + t1 + m[12];
    t3 = v[14];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[1];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[3];
    t1 = v[7];
    t0 = t0 + t1 + m[3];
    t3 = v[15];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[9];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[0];
    t1 = v[5];
    t0 = t0 + t1 + m[5];
    t3 = v[15];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[0];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[1];
    t1 = v[6];
    t0 = t0 + t1 + m[15];
    t3 = v[12];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[4];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[2];
    t1 = v[7];
    t0 = t0 + t1 + m[8];
    t3 = v[13];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[6];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[3];
    t1 = v[4];
    t0 = t0 + t1 + m[2];
    t3 = v[14];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[10];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

/* Round 8 */
    t0 = v[0];
    t1 = v[4];
    t0 = t0 + t1 + m[6];
    t3 = v[12];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[15];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

    t0 = v[1];
    t1 = v[5];
    t0 = t0 + t1 + m[14];
    t3 = v[13];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[9];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[2];
    t1 = v[6];
    t0 = t0 + t1 + m[11];
    t3 = v[14];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[3];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[3];
    t1 = v[7];
    t0 = t0 + t1 + m[0];
    t3 = v[15];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[8];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[0];
    t1 = v[5];
    t0 = t0 + t1 + m[12];
    t3 = v[15];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[2];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[1];
    t1 = v[6];
    t0 = t0 + t1 + m[13];
    t3 = v[12];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[7];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[2];
    t1 = v[7];
    t0 = t0 + t1 + m[1];
    t3 = v[13];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[4];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[3];
    t1 = v[4];
    t0 = t0 + t1 + m[10];
    t3 = v[14];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[5];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

/* Round 9 */
    t0 = v[0];
    t1 = v[4];
    t0 = t0 + t1 + m[10];
    t3 = v[12];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[2];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

    t0 = v[1];
    t1 = v[5];
    t0 = t0 + t1 + m[8];
    t3 = v[13];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[4];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[2];
    t1 = v[6];
    t0 = t0 + t1 + m[7];
    t3 = v[14];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[6];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[3];
    t1 = v[7];
    t0 = t0 + t1 + m[1];
    t3 = v[15];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[5];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[0];
    t1 = v[5];
    t0 = t0 + t1 + m[15];
    t3 = v[15];
    t2 = v[10];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[11];
    v[0] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[15] = t3;
    t2 = t2 + t3;
    v[10] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[5] = t1;

    t0 = v[1];
    t1 = v[6];
    t0 = t0 + t1 + m[9];
    t3 = v[12];
    t2 = v[11];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[14];
    v[1] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[12] = t3;
    t2 = t2 + t3;
    v[11] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[6] = t1;

    t0 = v[2];
    t1 = v[7];
    t0 = t0 + t1 + m[3];
    t3 = v[13];
    t2 = v[8];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[12];
    v[2] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[13] = t3;
    t2 = t2 + t3;
    v[8] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[7] = t1;

    t0 = v[3];
    t1 = v[4];
    t0 = t0 + t1 + m[13];
    t3 = v[14];
    t2 = v[9];
    t3 = ROTR32(t3 ^ t0, 16);
    t2 = t2 + t3;
    t1 = ROTR32(t1 ^ t2, 12);
    t0 = t0 + t1 + m[0];
    v[3] = t0;
    t3 = ROTR32(t3 ^ t0, 8);
    v[14] = t3;
    t2 = t2 + t3;
    v[9] = t2;
    t1 = ROTR32(t1 ^ t2, 7);
    v[4] = t1;

    S->h[0] ^= v[0] ^ v[8];
    S->h[1] ^= v[1] ^ v[9];
    S->h[2] ^= v[2] ^ v[10];
    S->h[3] ^= v[3] ^ v[11];
    S->h[4] ^= v[4] ^ v[12];
    S->h[5] ^= v[5] ^ v[13];
    S->h[6] ^= v[6] ^ v[14];
    S->h[7] ^= v[7] ^ v[15];
}