Пример #1
0
void bench_filo()
{
    All_Memo_counter MC_INIT;
    stow_memory_counter(MC_INIT);
    INT nb = 200;

    {
         ElFilo<III> Fi(4);
		 INT i;

         for ( i= 0; i<nb; i++)
             Fi.pushlast(III(i));

         for ( i= 0; i<nb; i++)
             Fi[i].i() *= 2;

         for ( i= 0; i<nb; i++)
             BENCH_ASSERT(Fi[i].i() == 2*i);

         for ( i= 2*(nb-1) ; i>= 2; i-= 2)
             BENCH_ASSERT(Fi.poplast().i() == i);

         BENCH_ASSERT(Fi.nb() == 1);
    }
    verif_memory_state(MC_INIT);
}
Пример #2
0
static int  rmd128_compress(hash_state *md, unsigned char *buf)
#endif
{
   ulong32 aa,bb,cc,dd,aaa,bbb,ccc,ddd,X[16];
   int i;
   
   /* load words X */
   for (i = 0; i < 16; i++){
      LOAD32L(X[i], buf + (4 * i));
   }

   /* load state */
   aa = aaa = md->rmd128.state[0];
   bb = bbb = md->rmd128.state[1];
   cc = ccc = md->rmd128.state[2];
   dd = ddd = md->rmd128.state[3];

   /* round 1 */
   FF(aa, bb, cc, dd, X[ 0], 11);
   FF(dd, aa, bb, cc, X[ 1], 14);
   FF(cc, dd, aa, bb, X[ 2], 15);
   FF(bb, cc, dd, aa, X[ 3], 12);
   FF(aa, bb, cc, dd, X[ 4],  5);
   FF(dd, aa, bb, cc, X[ 5],  8);
   FF(cc, dd, aa, bb, X[ 6],  7);
   FF(bb, cc, dd, aa, X[ 7],  9);
   FF(aa, bb, cc, dd, X[ 8], 11);
   FF(dd, aa, bb, cc, X[ 9], 13);
   FF(cc, dd, aa, bb, X[10], 14);
   FF(bb, cc, dd, aa, X[11], 15);
   FF(aa, bb, cc, dd, X[12],  6);
   FF(dd, aa, bb, cc, X[13],  7);
   FF(cc, dd, aa, bb, X[14],  9);
   FF(bb, cc, dd, aa, X[15],  8);
                             
   /* round 2 */
   GG(aa, bb, cc, dd, X[ 7],  7);
   GG(dd, aa, bb, cc, X[ 4],  6);
   GG(cc, dd, aa, bb, X[13],  8);
   GG(bb, cc, dd, aa, X[ 1], 13);
   GG(aa, bb, cc, dd, X[10], 11);
   GG(dd, aa, bb, cc, X[ 6],  9);
   GG(cc, dd, aa, bb, X[15],  7);
   GG(bb, cc, dd, aa, X[ 3], 15);
   GG(aa, bb, cc, dd, X[12],  7);
   GG(dd, aa, bb, cc, X[ 0], 12);
   GG(cc, dd, aa, bb, X[ 9], 15);
   GG(bb, cc, dd, aa, X[ 5],  9);
   GG(aa, bb, cc, dd, X[ 2], 11);
   GG(dd, aa, bb, cc, X[14],  7);
   GG(cc, dd, aa, bb, X[11], 13);
   GG(bb, cc, dd, aa, X[ 8], 12);

   /* round 3 */
   HH(aa, bb, cc, dd, X[ 3], 11);
   HH(dd, aa, bb, cc, X[10], 13);
   HH(cc, dd, aa, bb, X[14],  6);
   HH(bb, cc, dd, aa, X[ 4],  7);
   HH(aa, bb, cc, dd, X[ 9], 14);
   HH(dd, aa, bb, cc, X[15],  9);
   HH(cc, dd, aa, bb, X[ 8], 13);
   HH(bb, cc, dd, aa, X[ 1], 15);
   HH(aa, bb, cc, dd, X[ 2], 14);
   HH(dd, aa, bb, cc, X[ 7],  8);
   HH(cc, dd, aa, bb, X[ 0], 13);
   HH(bb, cc, dd, aa, X[ 6],  6);
   HH(aa, bb, cc, dd, X[13],  5);
   HH(dd, aa, bb, cc, X[11], 12);
   HH(cc, dd, aa, bb, X[ 5],  7);
   HH(bb, cc, dd, aa, X[12],  5);

   /* round 4 */
   II(aa, bb, cc, dd, X[ 1], 11);
   II(dd, aa, bb, cc, X[ 9], 12);
   II(cc, dd, aa, bb, X[11], 14);
   II(bb, cc, dd, aa, X[10], 15);
   II(aa, bb, cc, dd, X[ 0], 14);
   II(dd, aa, bb, cc, X[ 8], 15);
   II(cc, dd, aa, bb, X[12],  9);
   II(bb, cc, dd, aa, X[ 4],  8);
   II(aa, bb, cc, dd, X[13],  9);
   II(dd, aa, bb, cc, X[ 3], 14);
   II(cc, dd, aa, bb, X[ 7],  5);
   II(bb, cc, dd, aa, X[15],  6);
   II(aa, bb, cc, dd, X[14],  8);
   II(dd, aa, bb, cc, X[ 5],  6);
   II(cc, dd, aa, bb, X[ 6],  5);
   II(bb, cc, dd, aa, X[ 2], 12);

   /* parallel round 1 */
   III(aaa, bbb, ccc, ddd, X[ 5],  8); 
   III(ddd, aaa, bbb, ccc, X[14],  9);
   III(ccc, ddd, aaa, bbb, X[ 7],  9);
   III(bbb, ccc, ddd, aaa, X[ 0], 11);
   III(aaa, bbb, ccc, ddd, X[ 9], 13);
   III(ddd, aaa, bbb, ccc, X[ 2], 15);
   III(ccc, ddd, aaa, bbb, X[11], 15);
   III(bbb, ccc, ddd, aaa, X[ 4],  5);
   III(aaa, bbb, ccc, ddd, X[13],  7);
   III(ddd, aaa, bbb, ccc, X[ 6],  7);
   III(ccc, ddd, aaa, bbb, X[15],  8);
   III(bbb, ccc, ddd, aaa, X[ 8], 11);
   III(aaa, bbb, ccc, ddd, X[ 1], 14);
   III(ddd, aaa, bbb, ccc, X[10], 14);
   III(ccc, ddd, aaa, bbb, X[ 3], 12);
   III(bbb, ccc, ddd, aaa, X[12],  6);

   /* parallel round 2 */
   HHH(aaa, bbb, ccc, ddd, X[ 6],  9);
   HHH(ddd, aaa, bbb, ccc, X[11], 13);
   HHH(ccc, ddd, aaa, bbb, X[ 3], 15);
   HHH(bbb, ccc, ddd, aaa, X[ 7],  7);
   HHH(aaa, bbb, ccc, ddd, X[ 0], 12);
   HHH(ddd, aaa, bbb, ccc, X[13],  8);
   HHH(ccc, ddd, aaa, bbb, X[ 5],  9);
   HHH(bbb, ccc, ddd, aaa, X[10], 11);
   HHH(aaa, bbb, ccc, ddd, X[14],  7);
   HHH(ddd, aaa, bbb, ccc, X[15],  7);
   HHH(ccc, ddd, aaa, bbb, X[ 8], 12);
   HHH(bbb, ccc, ddd, aaa, X[12],  7);
   HHH(aaa, bbb, ccc, ddd, X[ 4],  6);
   HHH(ddd, aaa, bbb, ccc, X[ 9], 15);
   HHH(ccc, ddd, aaa, bbb, X[ 1], 13);
   HHH(bbb, ccc, ddd, aaa, X[ 2], 11);

   /* parallel round 3 */   
   GGG(aaa, bbb, ccc, ddd, X[15],  9);
   GGG(ddd, aaa, bbb, ccc, X[ 5],  7);
   GGG(ccc, ddd, aaa, bbb, X[ 1], 15);
   GGG(bbb, ccc, ddd, aaa, X[ 3], 11);
   GGG(aaa, bbb, ccc, ddd, X[ 7],  8);
   GGG(ddd, aaa, bbb, ccc, X[14],  6);
   GGG(ccc, ddd, aaa, bbb, X[ 6],  6);
   GGG(bbb, ccc, ddd, aaa, X[ 9], 14);
   GGG(aaa, bbb, ccc, ddd, X[11], 12);
   GGG(ddd, aaa, bbb, ccc, X[ 8], 13);
   GGG(ccc, ddd, aaa, bbb, X[12],  5);
   GGG(bbb, ccc, ddd, aaa, X[ 2], 14);
   GGG(aaa, bbb, ccc, ddd, X[10], 13);
   GGG(ddd, aaa, bbb, ccc, X[ 0], 13);
   GGG(ccc, ddd, aaa, bbb, X[ 4],  7);
   GGG(bbb, ccc, ddd, aaa, X[13],  5);

   /* parallel round 4 */
   FFF(aaa, bbb, ccc, ddd, X[ 8], 15);
   FFF(ddd, aaa, bbb, ccc, X[ 6],  5);
   FFF(ccc, ddd, aaa, bbb, X[ 4],  8);
   FFF(bbb, ccc, ddd, aaa, X[ 1], 11);
   FFF(aaa, bbb, ccc, ddd, X[ 3], 14);
   FFF(ddd, aaa, bbb, ccc, X[11], 14);
   FFF(ccc, ddd, aaa, bbb, X[15],  6);
   FFF(bbb, ccc, ddd, aaa, X[ 0], 14);
   FFF(aaa, bbb, ccc, ddd, X[ 5],  6);
   FFF(ddd, aaa, bbb, ccc, X[12],  9);
   FFF(ccc, ddd, aaa, bbb, X[ 2], 12);
   FFF(bbb, ccc, ddd, aaa, X[13],  9);
   FFF(aaa, bbb, ccc, ddd, X[ 9], 12);
   FFF(ddd, aaa, bbb, ccc, X[ 7],  5);
   FFF(ccc, ddd, aaa, bbb, X[10], 15);
   FFF(bbb, ccc, ddd, aaa, X[14],  8);

   /* combine results */
   ddd += cc + md->rmd128.state[1];               /* final result for MDbuf[0] */
   md->rmd128.state[1] = md->rmd128.state[2] + dd + aaa;
   md->rmd128.state[2] = md->rmd128.state[3] + aa + bbb;
   md->rmd128.state[3] = md->rmd128.state[0] + bb + ccc;
   md->rmd128.state[0] = ddd;

   return CRYPT_OK;
}
Пример #3
0
static int  rmd256_compress(hash_state *md, unsigned char *buf)
#endif
{
   ulong32 aa,bb,cc,dd,aaa,bbb,ccc,ddd,tmp,X[16];
   int i;

   /* load words X */
   for (i = 0; i < 16; i++){
      LOAD32L(X[i], buf + (4 * i));
   }

   /* load state */
   aa = md->rmd256.state[0];
   bb = md->rmd256.state[1];
   cc = md->rmd256.state[2];
   dd = md->rmd256.state[3];
   aaa = md->rmd256.state[4];
   bbb = md->rmd256.state[5];
   ccc = md->rmd256.state[6];
   ddd = md->rmd256.state[7];

   /* round 1 */
   FF(aa, bb, cc, dd, X[ 0], 11);
   FF(dd, aa, bb, cc, X[ 1], 14);
   FF(cc, dd, aa, bb, X[ 2], 15);
   FF(bb, cc, dd, aa, X[ 3], 12);
   FF(aa, bb, cc, dd, X[ 4],  5);
   FF(dd, aa, bb, cc, X[ 5],  8);
   FF(cc, dd, aa, bb, X[ 6],  7);
   FF(bb, cc, dd, aa, X[ 7],  9);
   FF(aa, bb, cc, dd, X[ 8], 11);
   FF(dd, aa, bb, cc, X[ 9], 13);
   FF(cc, dd, aa, bb, X[10], 14);
   FF(bb, cc, dd, aa, X[11], 15);
   FF(aa, bb, cc, dd, X[12],  6);
   FF(dd, aa, bb, cc, X[13],  7);
   FF(cc, dd, aa, bb, X[14],  9);
   FF(bb, cc, dd, aa, X[15],  8);

   /* parallel round 1 */
   III(aaa, bbb, ccc, ddd, X[ 5],  8);
   III(ddd, aaa, bbb, ccc, X[14],  9);
   III(ccc, ddd, aaa, bbb, X[ 7],  9);
   III(bbb, ccc, ddd, aaa, X[ 0], 11);
   III(aaa, bbb, ccc, ddd, X[ 9], 13);
   III(ddd, aaa, bbb, ccc, X[ 2], 15);
   III(ccc, ddd, aaa, bbb, X[11], 15);
   III(bbb, ccc, ddd, aaa, X[ 4],  5);
   III(aaa, bbb, ccc, ddd, X[13],  7);
   III(ddd, aaa, bbb, ccc, X[ 6],  7);
   III(ccc, ddd, aaa, bbb, X[15],  8);
   III(bbb, ccc, ddd, aaa, X[ 8], 11);
   III(aaa, bbb, ccc, ddd, X[ 1], 14);
   III(ddd, aaa, bbb, ccc, X[10], 14);
   III(ccc, ddd, aaa, bbb, X[ 3], 12);
   III(bbb, ccc, ddd, aaa, X[12],  6);

   tmp = aa; aa = aaa; aaa = tmp;

   /* round 2 */
   GG(aa, bb, cc, dd, X[ 7],  7);
   GG(dd, aa, bb, cc, X[ 4],  6);
   GG(cc, dd, aa, bb, X[13],  8);
   GG(bb, cc, dd, aa, X[ 1], 13);
   GG(aa, bb, cc, dd, X[10], 11);
   GG(dd, aa, bb, cc, X[ 6],  9);
   GG(cc, dd, aa, bb, X[15],  7);
   GG(bb, cc, dd, aa, X[ 3], 15);
   GG(aa, bb, cc, dd, X[12],  7);
   GG(dd, aa, bb, cc, X[ 0], 12);
   GG(cc, dd, aa, bb, X[ 9], 15);
   GG(bb, cc, dd, aa, X[ 5],  9);
   GG(aa, bb, cc, dd, X[ 2], 11);
   GG(dd, aa, bb, cc, X[14],  7);
   GG(cc, dd, aa, bb, X[11], 13);
   GG(bb, cc, dd, aa, X[ 8], 12);

   /* parallel round 2 */
   HHH(aaa, bbb, ccc, ddd, X[ 6],  9);
   HHH(ddd, aaa, bbb, ccc, X[11], 13);
   HHH(ccc, ddd, aaa, bbb, X[ 3], 15);
   HHH(bbb, ccc, ddd, aaa, X[ 7],  7);
   HHH(aaa, bbb, ccc, ddd, X[ 0], 12);
   HHH(ddd, aaa, bbb, ccc, X[13],  8);
   HHH(ccc, ddd, aaa, bbb, X[ 5],  9);
   HHH(bbb, ccc, ddd, aaa, X[10], 11);
   HHH(aaa, bbb, ccc, ddd, X[14],  7);
   HHH(ddd, aaa, bbb, ccc, X[15],  7);
   HHH(ccc, ddd, aaa, bbb, X[ 8], 12);
   HHH(bbb, ccc, ddd, aaa, X[12],  7);
   HHH(aaa, bbb, ccc, ddd, X[ 4],  6);
   HHH(ddd, aaa, bbb, ccc, X[ 9], 15);
   HHH(ccc, ddd, aaa, bbb, X[ 1], 13);
   HHH(bbb, ccc, ddd, aaa, X[ 2], 11);

   tmp = bb; bb = bbb; bbb = tmp;

   /* round 3 */
   HH(aa, bb, cc, dd, X[ 3], 11);
   HH(dd, aa, bb, cc, X[10], 13);
   HH(cc, dd, aa, bb, X[14],  6);
   HH(bb, cc, dd, aa, X[ 4],  7);
   HH(aa, bb, cc, dd, X[ 9], 14);
   HH(dd, aa, bb, cc, X[15],  9);
   HH(cc, dd, aa, bb, X[ 8], 13);
   HH(bb, cc, dd, aa, X[ 1], 15);
   HH(aa, bb, cc, dd, X[ 2], 14);
   HH(dd, aa, bb, cc, X[ 7],  8);
   HH(cc, dd, aa, bb, X[ 0], 13);
   HH(bb, cc, dd, aa, X[ 6],  6);
   HH(aa, bb, cc, dd, X[13],  5);
   HH(dd, aa, bb, cc, X[11], 12);
   HH(cc, dd, aa, bb, X[ 5],  7);
   HH(bb, cc, dd, aa, X[12],  5);

   /* parallel round 3 */
   GGG(aaa, bbb, ccc, ddd, X[15],  9);
   GGG(ddd, aaa, bbb, ccc, X[ 5],  7);
   GGG(ccc, ddd, aaa, bbb, X[ 1], 15);
   GGG(bbb, ccc, ddd, aaa, X[ 3], 11);
   GGG(aaa, bbb, ccc, ddd, X[ 7],  8);
   GGG(ddd, aaa, bbb, ccc, X[14],  6);
   GGG(ccc, ddd, aaa, bbb, X[ 6],  6);
   GGG(bbb, ccc, ddd, aaa, X[ 9], 14);
   GGG(aaa, bbb, ccc, ddd, X[11], 12);
   GGG(ddd, aaa, bbb, ccc, X[ 8], 13);
   GGG(ccc, ddd, aaa, bbb, X[12],  5);
   GGG(bbb, ccc, ddd, aaa, X[ 2], 14);
   GGG(aaa, bbb, ccc, ddd, X[10], 13);
   GGG(ddd, aaa, bbb, ccc, X[ 0], 13);
   GGG(ccc, ddd, aaa, bbb, X[ 4],  7);
   GGG(bbb, ccc, ddd, aaa, X[13],  5);

   tmp = cc; cc = ccc; ccc = tmp;

   /* round 4 */
   II(aa, bb, cc, dd, X[ 1], 11);
   II(dd, aa, bb, cc, X[ 9], 12);
   II(cc, dd, aa, bb, X[11], 14);
   II(bb, cc, dd, aa, X[10], 15);
   II(aa, bb, cc, dd, X[ 0], 14);
   II(dd, aa, bb, cc, X[ 8], 15);
   II(cc, dd, aa, bb, X[12],  9);
   II(bb, cc, dd, aa, X[ 4],  8);
   II(aa, bb, cc, dd, X[13],  9);
   II(dd, aa, bb, cc, X[ 3], 14);
   II(cc, dd, aa, bb, X[ 7],  5);
   II(bb, cc, dd, aa, X[15],  6);
   II(aa, bb, cc, dd, X[14],  8);
   II(dd, aa, bb, cc, X[ 5],  6);
   II(cc, dd, aa, bb, X[ 6],  5);
   II(bb, cc, dd, aa, X[ 2], 12);

   /* parallel round 4 */
   FFF(aaa, bbb, ccc, ddd, X[ 8], 15);
   FFF(ddd, aaa, bbb, ccc, X[ 6],  5);
   FFF(ccc, ddd, aaa, bbb, X[ 4],  8);
   FFF(bbb, ccc, ddd, aaa, X[ 1], 11);
   FFF(aaa, bbb, ccc, ddd, X[ 3], 14);
   FFF(ddd, aaa, bbb, ccc, X[11], 14);
   FFF(ccc, ddd, aaa, bbb, X[15],  6);
   FFF(bbb, ccc, ddd, aaa, X[ 0], 14);
   FFF(aaa, bbb, ccc, ddd, X[ 5],  6);
   FFF(ddd, aaa, bbb, ccc, X[12],  9);
   FFF(ccc, ddd, aaa, bbb, X[ 2], 12);
   FFF(bbb, ccc, ddd, aaa, X[13],  9);
   FFF(aaa, bbb, ccc, ddd, X[ 9], 12);
   FFF(ddd, aaa, bbb, ccc, X[ 7],  5);
   FFF(ccc, ddd, aaa, bbb, X[10], 15);
   FFF(bbb, ccc, ddd, aaa, X[14],  8);

   tmp = dd; dd = ddd; ddd = tmp;

   /* combine results */
   md->rmd256.state[0] += aa;
   md->rmd256.state[1] += bb;
   md->rmd256.state[2] += cc;
   md->rmd256.state[3] += dd;
   md->rmd256.state[4] += aaa;
   md->rmd256.state[5] += bbb;
   md->rmd256.state[6] += ccc;
   md->rmd256.state[7] += ddd;

   return CRYPT_OK;
}
Пример #4
0
static void rmd128_compress(ccdigest_state_t state, unsigned long nblocks, const void *in)
{
    uint32_t aa,bb,cc,dd,aaa,bbb,ccc,ddd,X[16];
    int i;
    uint32_t *s = ccdigest_u32(state);
    const unsigned char *buf = in;

    while(nblocks--) {

        /* load words X */
        for (i = 0; i < 16; i++){
            CC_LOAD32_LE(X[i], buf + (4 * i));
        }

        /* load state */
        aa = aaa = s[0];
        bb = bbb = s[1];
        cc = ccc = s[2];
        dd = ddd = s[3];

        /* round 1 */
        FF(aa, bb, cc, dd, X[ 0], 11);
        FF(dd, aa, bb, cc, X[ 1], 14);
        FF(cc, dd, aa, bb, X[ 2], 15);
        FF(bb, cc, dd, aa, X[ 3], 12);
        FF(aa, bb, cc, dd, X[ 4],  5);
        FF(dd, aa, bb, cc, X[ 5],  8);
        FF(cc, dd, aa, bb, X[ 6],  7);
        FF(bb, cc, dd, aa, X[ 7],  9);
        FF(aa, bb, cc, dd, X[ 8], 11);
        FF(dd, aa, bb, cc, X[ 9], 13);
        FF(cc, dd, aa, bb, X[10], 14);
        FF(bb, cc, dd, aa, X[11], 15);
        FF(aa, bb, cc, dd, X[12],  6);
        FF(dd, aa, bb, cc, X[13],  7);
        FF(cc, dd, aa, bb, X[14],  9);
        FF(bb, cc, dd, aa, X[15],  8);

        /* round 2 */
        GG(aa, bb, cc, dd, X[ 7],  7);
        GG(dd, aa, bb, cc, X[ 4],  6);
        GG(cc, dd, aa, bb, X[13],  8);
        GG(bb, cc, dd, aa, X[ 1], 13);
        GG(aa, bb, cc, dd, X[10], 11);
        GG(dd, aa, bb, cc, X[ 6],  9);
        GG(cc, dd, aa, bb, X[15],  7);
        GG(bb, cc, dd, aa, X[ 3], 15);
        GG(aa, bb, cc, dd, X[12],  7);
        GG(dd, aa, bb, cc, X[ 0], 12);
        GG(cc, dd, aa, bb, X[ 9], 15);
        GG(bb, cc, dd, aa, X[ 5],  9);
        GG(aa, bb, cc, dd, X[ 2], 11);
        GG(dd, aa, bb, cc, X[14],  7);
        GG(cc, dd, aa, bb, X[11], 13);
        GG(bb, cc, dd, aa, X[ 8], 12);

        /* round 3 */
        HH(aa, bb, cc, dd, X[ 3], 11);
        HH(dd, aa, bb, cc, X[10], 13);
        HH(cc, dd, aa, bb, X[14],  6);
        HH(bb, cc, dd, aa, X[ 4],  7);
        HH(aa, bb, cc, dd, X[ 9], 14);
        HH(dd, aa, bb, cc, X[15],  9);
        HH(cc, dd, aa, bb, X[ 8], 13);
        HH(bb, cc, dd, aa, X[ 1], 15);
        HH(aa, bb, cc, dd, X[ 2], 14);
        HH(dd, aa, bb, cc, X[ 7],  8);
        HH(cc, dd, aa, bb, X[ 0], 13);
        HH(bb, cc, dd, aa, X[ 6],  6);
        HH(aa, bb, cc, dd, X[13],  5);
        HH(dd, aa, bb, cc, X[11], 12);
        HH(cc, dd, aa, bb, X[ 5],  7);
        HH(bb, cc, dd, aa, X[12],  5);

        /* round 4 */
        II(aa, bb, cc, dd, X[ 1], 11);
        II(dd, aa, bb, cc, X[ 9], 12);
        II(cc, dd, aa, bb, X[11], 14);
        II(bb, cc, dd, aa, X[10], 15);
        II(aa, bb, cc, dd, X[ 0], 14);
        II(dd, aa, bb, cc, X[ 8], 15);
        II(cc, dd, aa, bb, X[12],  9);
        II(bb, cc, dd, aa, X[ 4],  8);
        II(aa, bb, cc, dd, X[13],  9);
        II(dd, aa, bb, cc, X[ 3], 14);
        II(cc, dd, aa, bb, X[ 7],  5);
        II(bb, cc, dd, aa, X[15],  6);
        II(aa, bb, cc, dd, X[14],  8);
        II(dd, aa, bb, cc, X[ 5],  6);
        II(cc, dd, aa, bb, X[ 6],  5);
        II(bb, cc, dd, aa, X[ 2], 12);

        /* parallel round 1 */
        III(aaa, bbb, ccc, ddd, X[ 5],  8);
        III(ddd, aaa, bbb, ccc, X[14],  9);
        III(ccc, ddd, aaa, bbb, X[ 7],  9);
        III(bbb, ccc, ddd, aaa, X[ 0], 11);
        III(aaa, bbb, ccc, ddd, X[ 9], 13);
        III(ddd, aaa, bbb, ccc, X[ 2], 15);
        III(ccc, ddd, aaa, bbb, X[11], 15);
        III(bbb, ccc, ddd, aaa, X[ 4],  5);
        III(aaa, bbb, ccc, ddd, X[13],  7);
        III(ddd, aaa, bbb, ccc, X[ 6],  7);
        III(ccc, ddd, aaa, bbb, X[15],  8);
        III(bbb, ccc, ddd, aaa, X[ 8], 11);
        III(aaa, bbb, ccc, ddd, X[ 1], 14);
        III(ddd, aaa, bbb, ccc, X[10], 14);
        III(ccc, ddd, aaa, bbb, X[ 3], 12);
        III(bbb, ccc, ddd, aaa, X[12],  6);

        /* parallel round 2 */
        HHH(aaa, bbb, ccc, ddd, X[ 6],  9);
        HHH(ddd, aaa, bbb, ccc, X[11], 13);
        HHH(ccc, ddd, aaa, bbb, X[ 3], 15);
        HHH(bbb, ccc, ddd, aaa, X[ 7],  7);
        HHH(aaa, bbb, ccc, ddd, X[ 0], 12);
        HHH(ddd, aaa, bbb, ccc, X[13],  8);
        HHH(ccc, ddd, aaa, bbb, X[ 5],  9);
        HHH(bbb, ccc, ddd, aaa, X[10], 11);
        HHH(aaa, bbb, ccc, ddd, X[14],  7);
        HHH(ddd, aaa, bbb, ccc, X[15],  7);
        HHH(ccc, ddd, aaa, bbb, X[ 8], 12);
        HHH(bbb, ccc, ddd, aaa, X[12],  7);
        HHH(aaa, bbb, ccc, ddd, X[ 4],  6);
        HHH(ddd, aaa, bbb, ccc, X[ 9], 15);
        HHH(ccc, ddd, aaa, bbb, X[ 1], 13);
        HHH(bbb, ccc, ddd, aaa, X[ 2], 11);

        /* parallel round 3 */
        GGG(aaa, bbb, ccc, ddd, X[15],  9);
        GGG(ddd, aaa, bbb, ccc, X[ 5],  7);
        GGG(ccc, ddd, aaa, bbb, X[ 1], 15);
        GGG(bbb, ccc, ddd, aaa, X[ 3], 11);
        GGG(aaa, bbb, ccc, ddd, X[ 7],  8);
        GGG(ddd, aaa, bbb, ccc, X[14],  6);
        GGG(ccc, ddd, aaa, bbb, X[ 6],  6);
        GGG(bbb, ccc, ddd, aaa, X[ 9], 14);
        GGG(aaa, bbb, ccc, ddd, X[11], 12);
        GGG(ddd, aaa, bbb, ccc, X[ 8], 13);
        GGG(ccc, ddd, aaa, bbb, X[12],  5);
        GGG(bbb, ccc, ddd, aaa, X[ 2], 14);
        GGG(aaa, bbb, ccc, ddd, X[10], 13);
        GGG(ddd, aaa, bbb, ccc, X[ 0], 13);
        GGG(ccc, ddd, aaa, bbb, X[ 4],  7);
        GGG(bbb, ccc, ddd, aaa, X[13],  5);

        /* parallel round 4 */
        FFF(aaa, bbb, ccc, ddd, X[ 8], 15);
        FFF(ddd, aaa, bbb, ccc, X[ 6],  5);
        FFF(ccc, ddd, aaa, bbb, X[ 4],  8);
        FFF(bbb, ccc, ddd, aaa, X[ 1], 11);
        FFF(aaa, bbb, ccc, ddd, X[ 3], 14);
        FFF(ddd, aaa, bbb, ccc, X[11], 14);
        FFF(ccc, ddd, aaa, bbb, X[15],  6);
        FFF(bbb, ccc, ddd, aaa, X[ 0], 14);
        FFF(aaa, bbb, ccc, ddd, X[ 5],  6);
        FFF(ddd, aaa, bbb, ccc, X[12],  9);
        FFF(ccc, ddd, aaa, bbb, X[ 2], 12);
        FFF(bbb, ccc, ddd, aaa, X[13],  9);
        FFF(aaa, bbb, ccc, ddd, X[ 9], 12);
        FFF(ddd, aaa, bbb, ccc, X[ 7],  5);
        FFF(ccc, ddd, aaa, bbb, X[10], 15);
        FFF(bbb, ccc, ddd, aaa, X[14],  8);

        /* combine results */
        ddd += cc + s[1];               /* final result for MDbuf[0] */
        s[1] = s[2] + dd + aaa;
        s[2] = s[3] + aa + bbb;
        s[3] = s[0] + bb + ccc;
        s[0] = ddd;

        buf+=CCRMD_BLOCK_SIZE;
    }
}