/* init xors IV with input parameter block */
static int blake2b_init_param(hash_state *md, const unsigned char *P)
{
   unsigned long i;

   blake2b_init0(md);

   /* IV XOR ParamBlock */
   for (i = 0; i < 8; ++i) {
      ulong64 tmp;
      LOAD64L(tmp, P + i * 8);
      md->blake2b.h[i] ^= tmp;
   }

   md->blake2b.outlen = P[O_DIGEST_LENGTH];
   return CRYPT_OK;
}
static int blake2b_compress(hash_state *md, const unsigned char *buf)
#endif
{
   ulong64 m[16];
   ulong64 v[16];
   unsigned long i;

   for (i = 0; i < 16; ++i) {
      LOAD64L(m[i], buf + i * sizeof(m[i]));
   }

   for (i = 0; i < 8; ++i) {
      v[i] = md->blake2b.h[i];
   }

   v[8] = blake2b_IV[0];
   v[9] = blake2b_IV[1];
   v[10] = blake2b_IV[2];
   v[11] = blake2b_IV[3];
   v[12] = blake2b_IV[4] ^ md->blake2b.t[0];
   v[13] = blake2b_IV[5] ^ md->blake2b.t[1];
   v[14] = blake2b_IV[6] ^ md->blake2b.f[0];
   v[15] = blake2b_IV[7] ^ md->blake2b.f[1];

   ROUND(0);
   ROUND(1);
   ROUND(2);
   ROUND(3);
   ROUND(4);
   ROUND(5);
   ROUND(6);
   ROUND(7);
   ROUND(8);
   ROUND(9);
   ROUND(10);
   ROUND(11);

   for (i = 0; i < 8; ++i) {
      md->blake2b.h[i] = md->blake2b.h[i] ^ v[i] ^ v[i + 8];
   }
   return CRYPT_OK;
}
/* Test store/load macros with offsets */
int store_test(void)
{
  unsigned char buf[256];
  int y;
  ulong32 L, L1;
  ulong64 LL, LL1;
#ifdef LTC_FAST
  int x, z;
#endif

  for (y = 0; y < 4; y++) {
      L  = 0x12345678UL;
      L1 = 0;
      STORE32L(L, buf + y);
      LOAD32L(L1, buf + y);
      if (L1 != L) {
         fprintf(stderr, "\n32L failed at offset %d\n", y);
         return 1;
      }
      STORE32H(L, buf + y);
      LOAD32H(L1, buf + y);
      if (L1 != L) {
         fprintf(stderr, "\n32H failed at offset %d\n", y);
         return 1;
      }
  }

  for (y = 0; y < 8; y++) {
      LL = CONST64 (0x01020304050607);
      LL1 = 0;
      STORE64L(LL, buf + y);
      LOAD64L(LL1, buf + y);
      if (LL1 != LL) {
         fprintf(stderr, "\n64L failed at offset %d\n", y);
         return 1;
      }
      STORE64H(LL, buf + y);
      LOAD64H(LL1, buf + y);
      if (LL1 != LL) {
         fprintf(stderr, "\n64H failed at offset %d\n", y);
         return 1;
      }
  }

/* test LTC_FAST */
#ifdef LTC_FAST
  y = 16;

  for (z = 0; z < y; z++) {
     /* fill y bytes with random */
     yarrow_read(buf+z,   y, &yarrow_prng);
     yarrow_read(buf+z+y, y, &yarrow_prng);

     /* now XOR it byte for byte */
     for (x = 0; x < y; x++) {
         buf[2*y+z+x] = buf[z+x] ^ buf[z+y+x];
     }

     /* now XOR it word for word */
     for (x = 0; x < y; x += sizeof(LTC_FAST_TYPE)) {
        *((LTC_FAST_TYPE*)(&buf[3*y+z+x])) = *((LTC_FAST_TYPE*)(&buf[z+x])) ^ *((LTC_FAST_TYPE*)(&buf[z+y+x]));
     }

     if (memcmp(&buf[2*y+z], &buf[3*y+z], y)) {
        fprintf(stderr, "\nLTC_FAST failed at offset %d\n", z);
        return 1;
     }
  }
#endif
  return 0;
}