Esempio n. 1
0
void ChaCha_Policy<R>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
{
	word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;

	while (iterationCount--)
	{
		x0 = m_state[0];	x1 = m_state[1];	x2 = m_state[2];	x3 = m_state[3];
		x4 = m_state[4];	x5 = m_state[5];	x6 = m_state[6];	x7 = m_state[7];
		x8 = m_state[8];	x9 = m_state[9];	x10 = m_state[10];	x11 = m_state[11];
		x12 = m_state[12];	x13 = m_state[13];	x14 = m_state[14];	x15 = m_state[15];

		for (int i = static_cast<int>(ROUNDS); i > 0; i -= 2)
		{
			CHACHA_QUARTER_ROUND(x0, x4,  x8, x12);
			CHACHA_QUARTER_ROUND(x1, x5,  x9, x13);
			CHACHA_QUARTER_ROUND(x2, x6, x10, x14);
			CHACHA_QUARTER_ROUND(x3, x7, x11, x15);

			CHACHA_QUARTER_ROUND(x0, x5, x10, x15);
			CHACHA_QUARTER_ROUND(x1, x6, x11, x12);
			CHACHA_QUARTER_ROUND(x2, x7,  x8, x13);
			CHACHA_QUARTER_ROUND(x3, x4,  x9, x14);
		}

		#undef CHACHA_OUTPUT
		#define CHACHA_OUTPUT(x){\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x1 + m_state[1]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x2 + m_state[2]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x3 + m_state[3]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x5 + m_state[5]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x6 + m_state[6]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x7 + m_state[7]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x9 + m_state[9]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x10 + m_state[10]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x11 + m_state[11]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x13 + m_state[13]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x14 + m_state[14]);\
			CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x15 + m_state[15]);}

#ifndef CRYPTOPP_DOXYGEN_PROCESSING
		CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(CHACHA_OUTPUT, BYTES_PER_ITERATION);
#endif

		++m_state[12];
		m_state[13] += static_cast<word32>(m_state[12] == 0);
	}
}
Esempio n. 2
0
void ChaCha::chacha(byte output[64], const u32bit input[16])
   {
   u32bit x00 = input[ 0], x01 = input[ 1], x02 = input[ 2], x03 = input[ 3],
          x04 = input[ 4], x05 = input[ 5], x06 = input[ 6], x07 = input[ 7],
          x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11],
          x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15];

#define CHACHA_QUARTER_ROUND(a, b, c, d)   \
   do {                                    \
   a += b; d ^= a; d = rotate_left(d, 16); \
   c += d; b ^= c; b = rotate_left(b, 12); \
   a += b; d ^= a; d = rotate_left(d, 8);  \
   c += d; b ^= c; b = rotate_left(b, 7);  \
   } while(0)

   for(size_t i = 0; i != 10; ++i)
      {
      CHACHA_QUARTER_ROUND(x00, x04, x08, x12);
      CHACHA_QUARTER_ROUND(x01, x05, x09, x13);
      CHACHA_QUARTER_ROUND(x02, x06, x10, x14);
      CHACHA_QUARTER_ROUND(x03, x07, x11, x15);

      CHACHA_QUARTER_ROUND(x00, x05, x10, x15);
      CHACHA_QUARTER_ROUND(x01, x06, x11, x12);
      CHACHA_QUARTER_ROUND(x02, x07, x08, x13);
      CHACHA_QUARTER_ROUND(x03, x04, x09, x14);
      }

#undef CHACHA_QUARTER_ROUND

   store_le(x00 + input[ 0], output + 4 *  0);
   store_le(x01 + input[ 1], output + 4 *  1);
   store_le(x02 + input[ 2], output + 4 *  2);
   store_le(x03 + input[ 3], output + 4 *  3);
   store_le(x04 + input[ 4], output + 4 *  4);
   store_le(x05 + input[ 5], output + 4 *  5);
   store_le(x06 + input[ 6], output + 4 *  6);
   store_le(x07 + input[ 7], output + 4 *  7);
   store_le(x08 + input[ 8], output + 4 *  8);
   store_le(x09 + input[ 9], output + 4 *  9);
   store_le(x10 + input[10], output + 4 * 10);
   store_le(x11 + input[11], output + 4 * 11);
   store_le(x12 + input[12], output + 4 * 12);
   store_le(x13 + input[13], output + 4 * 13);
   store_le(x14 + input[14], output + 4 * 14);
   store_le(x15 + input[15], output + 4 * 15);
   }
Esempio n. 3
0
//static
void ChaCha::chacha_x4(byte output[64*4], u32bit input[16], size_t rounds)
   {
   BOTAN_ASSERT(rounds % 2 == 0, "Valid rounds");

#if defined(BOTAN_HAS_CHACHA_SSE2)
   if(CPUID::has_sse2())
      {
      return ChaCha::chacha_sse2_x4(output, input, rounds);
      }
#endif

   // TODO interleave rounds
   for(size_t i = 0; i != 4; ++i)
      {
      u32bit x00 = input[ 0], x01 = input[ 1], x02 = input[ 2], x03 = input[ 3],
             x04 = input[ 4], x05 = input[ 5], x06 = input[ 6], x07 = input[ 7],
             x08 = input[ 8], x09 = input[ 9], x10 = input[10], x11 = input[11],
             x12 = input[12], x13 = input[13], x14 = input[14], x15 = input[15];

#define CHACHA_QUARTER_ROUND(a, b, c, d)        \
      do {                                      \
      a += b; d ^= a; d = rotate_left(d, 16);   \
      c += d; b ^= c; b = rotate_left(b, 12);   \
      a += b; d ^= a; d = rotate_left(d, 8);    \
      c += d; b ^= c; b = rotate_left(b, 7);    \
      } while(0)

      for(size_t r = 0; r != rounds / 2; ++r)
         {
         CHACHA_QUARTER_ROUND(x00, x04, x08, x12);
         CHACHA_QUARTER_ROUND(x01, x05, x09, x13);
         CHACHA_QUARTER_ROUND(x02, x06, x10, x14);
         CHACHA_QUARTER_ROUND(x03, x07, x11, x15);

         CHACHA_QUARTER_ROUND(x00, x05, x10, x15);
         CHACHA_QUARTER_ROUND(x01, x06, x11, x12);
         CHACHA_QUARTER_ROUND(x02, x07, x08, x13);
         CHACHA_QUARTER_ROUND(x03, x04, x09, x14);
         }

#undef CHACHA_QUARTER_ROUND

      x00 += input[0];
      x01 += input[1];
      x02 += input[2];
      x03 += input[3];
      x04 += input[4];
      x05 += input[5];
      x06 += input[6];
      x07 += input[7];
      x08 += input[8];
      x09 += input[9];
      x10 += input[10];
      x11 += input[11];
      x12 += input[12];
      x13 += input[13];
      x14 += input[14];
      x15 += input[15];

      store_le(x00, output + 64 * i + 4 *  0);
      store_le(x01, output + 64 * i + 4 *  1);
      store_le(x02, output + 64 * i + 4 *  2);
      store_le(x03, output + 64 * i + 4 *  3);
      store_le(x04, output + 64 * i + 4 *  4);
      store_le(x05, output + 64 * i + 4 *  5);
      store_le(x06, output + 64 * i + 4 *  6);
      store_le(x07, output + 64 * i + 4 *  7);
      store_le(x08, output + 64 * i + 4 *  8);
      store_le(x09, output + 64 * i + 4 *  9);
      store_le(x10, output + 64 * i + 4 * 10);
      store_le(x11, output + 64 * i + 4 * 11);
      store_le(x12, output + 64 * i + 4 * 12);
      store_le(x13, output + 64 * i + 4 * 13);
      store_le(x14, output + 64 * i + 4 * 14);
      store_le(x15, output + 64 * i + 4 * 15);

      input[12]++;
      input[13] += input[12] < i; // carry?
      }
   }
Esempio n. 4
0
// OperateKeystream always produces a key stream. The key stream is written
// to output. Optionally a message may be supplied to xor with the key stream.
// The message is input, and output = output ^ input.
void ChaCha_Policy::OperateKeystream(KeystreamOperation operation,
        byte *output, const byte *input, size_t iterationCount)
{
    do
    {
#if (CRYPTOPP_AVX2_AVAILABLE)
        if (HasAVX2())
        {
            while (iterationCount >= 8 && MultiBlockSafe(8))
            {
                const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
                ChaCha_OperateKeystream_AVX2(m_state, xorInput ? input : NULLPTR, output, m_rounds);

                // MultiBlockSafe avoids overflow on the counter words
                m_state[12] += 8;
                //if (m_state[12] < 8)
                //    m_state[13]++;

                input += (!!xorInput) * 8 * BYTES_PER_ITERATION;
                output += 8 * BYTES_PER_ITERATION;
                iterationCount -= 8;
            }
        }
#endif

#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE)
        if (HasSSE2())
        {
            while (iterationCount >= 4 && MultiBlockSafe(4))
            {
                const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
                ChaCha_OperateKeystream_SSE2(m_state, xorInput ? input : NULLPTR, output, m_rounds);

                // MultiBlockSafe avoids overflow on the counter words
                m_state[12] += 4;
                //if (m_state[12] < 4)
                //    m_state[13]++;

                input += (!!xorInput)*4*BYTES_PER_ITERATION;
                output += 4*BYTES_PER_ITERATION;
                iterationCount -= 4;
            }
        }
#endif

#if (CRYPTOPP_ARM_NEON_AVAILABLE)
        if (HasNEON())
        {
            while (iterationCount >= 4 && MultiBlockSafe(4))
            {
                const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
                ChaCha_OperateKeystream_NEON(m_state, xorInput ? input : NULLPTR, output, m_rounds);

                // MultiBlockSafe avoids overflow on the counter words
                m_state[12] += 4;
                //if (m_state[12] < 4)
                //    m_state[13]++;

                input += (!!xorInput)*4*BYTES_PER_ITERATION;
                output += 4*BYTES_PER_ITERATION;
                iterationCount -= 4;
            }
        }
#endif

#if (CRYPTOPP_ALTIVEC_AVAILABLE)
        if (HasAltivec())
        {
            while (iterationCount >= 4 && MultiBlockSafe(4))
            {
                const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL;
                ChaCha_OperateKeystream_POWER7(m_state, xorInput ? input : NULLPTR, output, m_rounds);

                // MultiBlockSafe avoids overflow on the counter words
                m_state[12] += 4;
                //if (m_state[12] < 4)
                //    m_state[13]++;

                input += (!!xorInput)*4*BYTES_PER_ITERATION;
                output += 4*BYTES_PER_ITERATION;
                iterationCount -= 4;
            }
        }
#endif

        if (iterationCount)
        {
            word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;

            x0 = m_state[0];    x1 = m_state[1];    x2 = m_state[2];    x3 = m_state[3];
            x4 = m_state[4];    x5 = m_state[5];    x6 = m_state[6];    x7 = m_state[7];
            x8 = m_state[8];    x9 = m_state[9];    x10 = m_state[10];  x11 = m_state[11];
            x12 = m_state[12];  x13 = m_state[13];  x14 = m_state[14];  x15 = m_state[15];

            for (int i = static_cast<int>(m_rounds); i > 0; i -= 2)
            {
                CHACHA_QUARTER_ROUND(x0, x4,  x8, x12);
                CHACHA_QUARTER_ROUND(x1, x5,  x9, x13);
                CHACHA_QUARTER_ROUND(x2, x6, x10, x14);
                CHACHA_QUARTER_ROUND(x3, x7, x11, x15);

                CHACHA_QUARTER_ROUND(x0, x5, x10, x15);
                CHACHA_QUARTER_ROUND(x1, x6, x11, x12);
                CHACHA_QUARTER_ROUND(x2, x7,  x8, x13);
                CHACHA_QUARTER_ROUND(x3, x4,  x9, x14);
            }

            CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(CHACHA_OUTPUT, BYTES_PER_ITERATION);

            if (++m_state[12] == 0)
                m_state[13]++;
        }

    // We may re-enter a SIMD keystream operation from here.
    } while (iterationCount--);
}