예제 #1
0
uint64_t siphash24(const void *src, unsigned long src_sz, const char key[16])
{
	const uint64_t *_key = (uint64_t *)key;
	uint64_t k0 = _le64toh(_key[0]);
	uint64_t k1 = _le64toh(_key[1]);
	uint64_t b = (uint64_t)src_sz << 56;
	const uint64_t *in = (uint64_t *)src;

	uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
	uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
	uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
	uint64_t v3 = k1 ^ 0x7465646279746573ULL;

	while (src_sz >= 8) {
		uint64_t mi = _le64toh(RD64p(in));
		in += 1;
		src_sz -= 8;
		v3 ^= mi;
		DOUBLE_ROUND(v0, v1, v2, v3);
		v0 ^= mi;
	}

	uint64_t t = 0;
	uint8_t *pt = (uint8_t *)&t;
	uint8_t *m = (uint8_t *)in;
	switch (src_sz) {
	case 7:
		pt[6] = m[6];
	case 6:
		pt[5] = m[5];
	case 5:
		pt[4] = m[4];
	case 4:
		*((uint32_t *)&pt[0]) = RD32p(&m[0]);
		break;
	case 3:
		pt[2] = m[2];
	case 2:
		pt[1] = m[1];
	case 1:
		pt[0] = m[0];
	}
	b |= _le64toh(t);

	v3 ^= b;
	DOUBLE_ROUND(v0, v1, v2, v3);
	v0 ^= b;
	v2 ^= 0xff;
	DOUBLE_ROUND(v0, v1, v2, v3);
	DOUBLE_ROUND(v0, v1, v2, v3);
	return (v0 ^ v1) ^ (v2 ^ v3);
}
예제 #2
0
파일: csiphash.c 프로젝트: BwRy/Astoria
uint64_t siphash24(const void *src, unsigned long src_sz, const struct sipkey *key) {
	uint64_t k0 = key->k0;
	uint64_t k1 = key->k1;
	uint64_t b = (uint64_t)src_sz << 56;
	const uint64_t *in = (uint64_t*)src;

        uint64_t t;
        uint8_t *pt, *m;

	uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
	uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
	uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
	uint64_t v3 = k1 ^ 0x7465646279746573ULL;

	while (src_sz >= 8) {
#ifdef UNALIGNED_OK
		uint64_t mi = _le64toh(*in);
#else
		uint64_t mi;
		memcpy(&mi, in, 8);
		mi = _le64toh(mi);
#endif
		in += 1; src_sz -= 8;
		v3 ^= mi;
		DOUBLE_ROUND(v0,v1,v2,v3);
		v0 ^= mi;
	}

	t = 0; pt = (uint8_t*)&t; m = (uint8_t*)in;
	switch (src_sz) {
	case 7: pt[6] = m[6];
	case 6: pt[5] = m[5];
	case 5: pt[4] = m[4];
#ifdef UNALIGNED_OK
	case 4: *((uint32_t*)&pt[0]) = *((uint32_t*)&m[0]); break;
#else
	case 4: pt[3] = m[3];
#endif
	case 3: pt[2] = m[2];
	case 2: pt[1] = m[1];
	case 1: pt[0] = m[0];
	}
	b |= _le64toh(t);

	v3 ^= b;
	DOUBLE_ROUND(v0,v1,v2,v3);
	v0 ^= b; v2 ^= 0xff;
	DOUBLE_ROUND(v0,v1,v2,v3);
	DOUBLE_ROUND(v0,v1,v2,v3);
	return (v0 ^ v1) ^ (v2 ^ v3);
}
예제 #3
0
void Camellia::Base::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
{
#define KS(i, j) ks[i*4 + EFI(j/2)*2 + EFI(j%2)]

#define FL(klh, kll, krh, krl)		\
	ll ^= rotlFixed(lh & klh, 1);	\
	lh ^= (ll | kll);				\
	rh ^= (rl | krl);				\
	rl ^= rotlFixed(rh & krh, 1);

	word32 lh, ll, rh, rl;
	typedef BlockGetAndPut<word32, BigEndian> Block;
	Block::Get(inBlock)(lh)(ll)(rh)(rl);
	const word32 *ks = m_key.data();
	lh ^= KS(0,0);
	ll ^= KS(0,1);
	rh ^= KS(0,2);
	rl ^= KS(0,3);

	// timing attack countermeasure. see comments at top for more details
	const int cacheLineSize = GetCacheLineSize();
	unsigned int i;
	volatile word32 _u = 0;
	word32 u = _u;

	assert(IsAlignedOn(s1,GetAlignmentOf<word32>()));
	for (i=0; i<256; i+=cacheLineSize)
		u &= *(const word32 *)(void*)(s1+i);
	u &= *(const word32 *)(void*)(s1+252);
	lh |= u; ll |= u;

	SLOW_ROUND(lh, ll, rh, rl, KS(1,0), KS(1,1))
	SLOW_ROUND(rh, rl, lh, ll, KS(1,2), KS(1,3))
	for (i = m_rounds-1; i > 0; --i)
	{
		DOUBLE_ROUND(lh, ll, rh, rl, KS(2,0), KS(2,1), KS(2,2), KS(2,3))
		DOUBLE_ROUND(lh, ll, rh, rl, KS(3,0), KS(3,1), KS(3,2), KS(3,3))
		FL(KS(4,0), KS(4,1), KS(4,2), KS(4,3));
		DOUBLE_ROUND(lh, ll, rh, rl, KS(5,0), KS(5,1), KS(5,2), KS(5,3))
		ks += 16;
	}
	DOUBLE_ROUND(lh, ll, rh, rl, KS(2,0), KS(2,1), KS(2,2), KS(2,3))
	ROUND(lh, ll, rh, rl, KS(3,0), KS(3,1))
	SLOW_ROUND(rh, rl, lh, ll, KS(3,2), KS(3,3))
	lh ^= KS(4,0);
	ll ^= KS(4,1);
	rh ^= KS(4,2);
	rl ^= KS(4,3);
	Block::Put(xorBlock, outBlock)(rh)(rl)(lh)(ll);
}
예제 #4
0
void Camellia::Base::UncheckedSetKey(const byte *key, unsigned int keylen, const NameValuePairs &)
{
	m_rounds = (keylen >= 24) ? 4 : 3;
	unsigned int kslen = (8 * m_rounds + 2);
	m_key.New(kslen*2);
	word32 *ks32 = m_key.data();
	int m=0, a=0;
	if (!IsForwardTransformation())
		m = -1, a = kslen-1;

	word32 kl0, kl1, kl2, kl3;
	GetBlock<word32, BigEndian> getBlock(key);
	getBlock(kl0)(kl1)(kl2)(kl3);
	word32 k0=kl0, k1=kl1, k2=kl2, k3=kl3;

#define CALC_ADDR2(base, i, j)	((byte *)(base)+8*(i)+4*(j)+((-16*(i))&m))
#define CALC_ADDR(base, i)	CALC_ADDR2(base, i, 0)

#if 1
	word64 kwl, kwr;
	ks32 += 2*a;
#define PREPARE_KS_ROUNDS			\
	kwl = (word64(k0) << 32) | k1;	\
	kwr = (word64(k2) << 32) | k3
#define KS_ROUND_0(i)							\
	assert(IsAlignedOn(CALC_ADDR(ks32, i+EFI(0)),GetAlignmentOf<word64>()));	\
	assert(IsAlignedOn(CALC_ADDR(ks32, i+EFI(1)),GetAlignmentOf<word64>()));	\
	*(word64*)(void*)CALC_ADDR(ks32, i+EFI(0)) = kwl;	\
	*(word64*)(void*)CALC_ADDR(ks32, i+EFI(1)) = kwr
#define KS_ROUND(i, r, which)																						\
	assert(IsAlignedOn(CALC_ADDR(ks32, i+EFI(r<64)),GetAlignmentOf<word64>()));	\
	assert(IsAlignedOn(CALC_ADDR(ks32, i+EFI(r>64)),GetAlignmentOf<word64>()));	\
	if (which & (1<<int(r<64))) *(word64*)(void*)CALC_ADDR(ks32, i+EFI(r<64)) = (kwr << (r%64)) | (kwl >> (64 - (r%64)));	\
	if (which & (1<<int(r>64))) *(word64*)(void*)CALC_ADDR(ks32, i+EFI(r>64)) = (kwl << (r%64)) | (kwr >> (64 - (r%64)))
#else
	// SSE2 version is 30% faster on Intel Core 2. Doesn't seem worth the hassle of maintenance, but left here
	// #if'd out in case someone needs it.
	__m128i kw, kw2;
	__m128i *ks128 = (__m128i *)ks32+a/2;
	ks32 += 2*a;
#define PREPARE_KS_ROUNDS													\
	kw = _mm_set_epi32(k0, k1, k2, k3);										\
	if (m) kw2 = kw, kw = _mm_shuffle_epi32(kw, _MM_SHUFFLE(1, 0, 3, 2));	\
	else kw2 = _mm_shuffle_epi32(kw, _MM_SHUFFLE(1, 0, 3, 2))
#define KS_ROUND_0(i)										\
	_mm_store_si128((__m128i *)CALC_ADDR(ks128, i), kw)
#define KS_ROUND(i, r, which)	{																				\
	__m128i temp;																								\
	if (r<64 && (which!=1 || m)) temp = _mm_or_si128(_mm_slli_epi64(kw, r%64), _mm_srli_epi64(kw2, 64-r%64));	\
	else temp = _mm_or_si128(_mm_slli_epi64(kw2, r%64), _mm_srli_epi64(kw, 64-r%64));							\
	if (which & 2) _mm_store_si128((__m128i *)CALC_ADDR(ks128, i), temp);										\
	else _mm_storel_epi64((__m128i*)CALC_ADDR(ks32, i+EFI(0)), temp);											\
	}
#endif

	if (keylen == 16)
	{
		// KL
		PREPARE_KS_ROUNDS;
		KS_ROUND_0(0);
		KS_ROUND(4, 15, 3);
		KS_ROUND(10, 45, 3);
		KS_ROUND(12, 60, 2);
		KS_ROUND(16, 77, 3);
		KS_ROUND(18, 94, 3);
		KS_ROUND(22, 111, 3);

		// KA
		k0=kl0, k1=kl1, k2=kl2, k3=kl3;
		DOUBLE_ROUND(k0, k1, k2, k3, 0xA09E667Ful, 0x3BCC908Bul, 0xB67AE858ul, 0x4CAA73B2ul);
		k0^=kl0, k1^=kl1, k2^=kl2, k3^=kl3;
		DOUBLE_ROUND(k0, k1, k2, k3, 0xC6EF372Ful, 0xE94F82BEul, 0x54FF53A5ul, 0xF1D36F1Cul);

		PREPARE_KS_ROUNDS;
		KS_ROUND_0(2);
		KS_ROUND(6, 15, 3);
		KS_ROUND(8, 30, 3);
		KS_ROUND(12, 45, 1);
		KS_ROUND(14, 60, 3);
		KS_ROUND(20, 94, 3);
		KS_ROUND(24, 47, 3);
	}
	else
	{
		// KL
		PREPARE_KS_ROUNDS;
		KS_ROUND_0(0);
		KS_ROUND(12, 45, 3);
		KS_ROUND(16, 60, 3);
		KS_ROUND(22, 77, 3);
		KS_ROUND(30, 111, 3);

		// KR
		word32 kr0, kr1, kr2, kr3;
		GetBlock<word32, BigEndian>(key+16)(kr0)(kr1);
		if (keylen == 24)
			kr2 = ~kr0, kr3 = ~kr1;
		else
			GetBlock<word32, BigEndian>(key+24)(kr2)(kr3);
		k0=kr0, k1=kr1, k2=kr2, k3=kr3;

		PREPARE_KS_ROUNDS;
		KS_ROUND(4, 15, 3);
		KS_ROUND(8, 30, 3);
		KS_ROUND(18, 60, 3);
		KS_ROUND(26, 94, 3);

		// KA
		k0^=kl0, k1^=kl1, k2^=kl2, k3^=kl3;
		DOUBLE_ROUND(k0, k1, k2, k3, 0xA09E667Ful, 0x3BCC908Bul, 0xB67AE858ul, 0x4CAA73B2ul);
		k0^=kl0, k1^=kl1, k2^=kl2, k3^=kl3;
		DOUBLE_ROUND(k0, k1, k2, k3, 0xC6EF372Ful, 0xE94F82BEul, 0x54FF53A5ul, 0xF1D36F1Cul);

		PREPARE_KS_ROUNDS;
		KS_ROUND(6, 15, 3);
		KS_ROUND(14, 45, 3);
		KS_ROUND(24, 77, 3);
		KS_ROUND(28, 94, 3);

		// KB
		k0^=kr0, k1^=kr1, k2^=kr2, k3^=kr3;
		DOUBLE_ROUND(k0, k1, k2, k3, 0x10E527FAul, 0xDE682D1Dul, 0xB05688C2ul, 0xB3E6C1FDul);

		PREPARE_KS_ROUNDS;
		KS_ROUND_0(2);
		KS_ROUND(10, 30, 3);
		KS_ROUND(20, 60, 3);
		KS_ROUND(32, 47, 3);
	}
}
예제 #5
0
uint64_t siphash24(const void *src, unsigned long src_sz, const struct sipkey *key) {
	uint64_t k0 = key->k0;
	uint64_t k1 = key->k1;
	uint64_t b = (uint64_t)src_sz << 56;
#ifdef UNALIGNED_OK
	const uint64_t *in = (uint64_t*)src;
#else
	/* On platforms where alignment matters, if 'in' is a pointer to a
	 * datatype that must be aligned, the compiler is allowed to
	 * generate code that assumes that it is aligned as such.
	 */
	const uint8_t *in = (uint8_t *)src;
#endif

	uint64_t t;
	uint8_t *pt, *m;

	uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
	uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
	uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
	uint64_t v3 = k1 ^ 0x7465646279746573ULL;

	while (src_sz >= 8) {
#ifdef UNALIGNED_OK
		uint64_t mi = _le64toh(*in);
		in += 1;
#else
		uint64_t mi;
		memcpy(&mi, in, 8);
		mi = _le64toh(mi);
		in += 8;
#endif
		src_sz -= 8;
		v3 ^= mi;
		DOUBLE_ROUND(v0,v1,v2,v3);
		v0 ^= mi;
	}

	t = 0; pt = (uint8_t*)&t; m = (uint8_t*)in;
	switch (src_sz) {
	case 7: pt[6] = m[6];
	case 6: pt[5] = m[5];
	case 5: pt[4] = m[4];
#ifdef UNALIGNED_OK
	case 4: *((uint32_t*)&pt[0]) = *((uint32_t*)&m[0]); break;
#else
	case 4: pt[3] = m[3];
#endif
	case 3: pt[2] = m[2];
	case 2: pt[1] = m[1];
	case 1: pt[0] = m[0];
	}
	b |= _le64toh(t);

	v3 ^= b;
	DOUBLE_ROUND(v0,v1,v2,v3);
	v0 ^= b; v2 ^= 0xff;
	DOUBLE_ROUND(v0,v1,v2,v3);
	DOUBLE_ROUND(v0,v1,v2,v3);
	return (v0 ^ v1) ^ (v2 ^ v3);
}
예제 #6
0
extern_c void
crandom_chacha_expand(u_int64_t iv,
                      u_int64_t ctr,
                      int nr,
                      int output_size,
                      const unsigned char *key_,
                      unsigned char *output_) {
# if MIGHT_HAVE_SSE2
  if (HAVE(SSE2)) {
    ssereg *key = (ssereg *)key_;
    ssereg *output = (ssereg *)output_;
                     
    ssereg a1 = key[0], a2 = a1, aa = a1,
           b1 = key[1], b2 = b1, bb = b1,
           c1 = {iv, ctr}, c2 = {iv, ctr+1}, cc = c1,
           d1 = {0x3320646e61707865ull, 0x6b20657479622d32ull}, d2 = d1, dd = d1,
           p = {0, 1};
     
    int i,r;
#   if (NEED_XOP)
      if (HAVE(XOP)) {
        for (i=0; i<output_size; i+=128) {
          for (r=nr; r>0; r-=2)
            DOUBLE_ROUND(quarter_round_xop);
          OUTPUT_FUNCTION;
        }
        return;
      }
#   endif
#   if (NEED_SSSE3)
      if (HAVE(SSSE3)) {
        for (i=0; i<output_size; i+=128) {
          for (r=nr; r>0; r-=2)
            DOUBLE_ROUND(quarter_round_ssse3);
          OUTPUT_FUNCTION;
        }
        return;
      }
#   endif
#   if (NEED_SSE2)
      if (HAVE(SSE2)) {
        for (i=0; i<output_size; i+=128) {
          for (r=nr; r>0; r-=2)
            DOUBLE_ROUND(quarter_round_sse2);
          OUTPUT_FUNCTION;
        }
        return;
      }
#   endif
  }
# endif

# if NEED_CONV
  {
    const u_int32_t *key = (const u_int32_t *)key_;
    u_int32_t
      x[16],
      input[16] = {
        key[0], key[1], key[2], key[3],
        key[4], key[5], key[6], key[7],
        iv, iv>>32, ctr, ctr>>32,
        0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
      },
      *output = (u_int32_t *)output_;
    int i, r;
  
    for (i=0; i<output_size; i+= 64) {
      for (r=0; r<16; r++) {
        x[r] = input[r];
      }
      for (r=nr; r>0; r-=2) {
        quarter_round(&x[0], &x[4],  &x[8], &x[12]);
        quarter_round(&x[1], &x[5],  &x[9], &x[13]);
        quarter_round(&x[2], &x[6], &x[10], &x[14]);
        quarter_round(&x[3], &x[7], &x[11], &x[15]);
        
        quarter_round(&x[0], &x[5], &x[10], &x[15]);
        quarter_round(&x[1], &x[6], &x[11], &x[12]);
        quarter_round(&x[2], &x[7],  &x[8], &x[13]);
        quarter_round(&x[3], &x[4],  &x[9], &x[14]);
      }
      for (r=0; r<16; r++) {
        output[r] = x[r] + input[r];
      }
      
      output += 16;
      input[11] ++;
      if (!input[11]) input[12]++;
    }
  }