void ChaCha_Policy<R>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount) { word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; while (iterationCount--) { x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3]; x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7]; x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11]; x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15]; for (int i = static_cast<int>(ROUNDS); i > 0; i -= 2) { CHACHA_QUARTER_ROUND(x0, x4, x8, x12); CHACHA_QUARTER_ROUND(x1, x5, x9, x13); CHACHA_QUARTER_ROUND(x2, x6, x10, x14); CHACHA_QUARTER_ROUND(x3, x7, x11, x15); CHACHA_QUARTER_ROUND(x0, x5, x10, x15); CHACHA_QUARTER_ROUND(x1, x6, x11, x12); CHACHA_QUARTER_ROUND(x2, x7, x8, x13); CHACHA_QUARTER_ROUND(x3, x4, x9, x14); } #undef CHACHA_OUTPUT #define CHACHA_OUTPUT(x){\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x1 + m_state[1]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x2 + m_state[2]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x3 + m_state[3]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x5 + m_state[5]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x6 + m_state[6]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x7 + m_state[7]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x9 + m_state[9]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x10 + m_state[10]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x11 + m_state[11]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x13 + m_state[13]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x14 + m_state[14]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x15 + m_state[15]);} #ifndef CRYPTOPP_DOXYGEN_PROCESSING CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(CHACHA_OUTPUT, BYTES_PER_ITERATION); #endif ++m_state[12]; m_state[13] += static_cast<word32>(m_state[12] == 0); } }
void SEAL_Policy<B>::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount) { word32 a, b, c, d, n1, n2, n3, n4; unsigned int p, q; for (size_t iteration = 0; iteration < iterationCount; ++iteration) { #define Ttab(x) *(word32 *)((byte *)m_T.begin()+x) a = m_outsideCounter ^ m_R[4*m_insideCounter]; b = rotrFixed(m_outsideCounter, 8U) ^ m_R[4*m_insideCounter+1]; c = rotrFixed(m_outsideCounter, 16U) ^ m_R[4*m_insideCounter+2]; d = rotrFixed(m_outsideCounter, 24U) ^ m_R[4*m_insideCounter+3]; for (unsigned int j=0; j<2; j++) { p = a & 0x7fc; b += Ttab(p); a = rotrFixed(a, 9U); p = b & 0x7fc; c += Ttab(p); b = rotrFixed(b, 9U); p = c & 0x7fc; d += Ttab(p); c = rotrFixed(c, 9U); p = d & 0x7fc; a += Ttab(p); d = rotrFixed(d, 9U); } n1 = d, n2 = b, n3 = a, n4 = c; p = a & 0x7fc; b += Ttab(p); a = rotrFixed(a, 9U); p = b & 0x7fc; c += Ttab(p); b = rotrFixed(b, 9U); p = c & 0x7fc; d += Ttab(p); c = rotrFixed(c, 9U); p = d & 0x7fc; a += Ttab(p); d = rotrFixed(d, 9U); // generate 8192 bits for (unsigned int i=0; i<64; i++) { p = a & 0x7fc; a = rotrFixed(a, 9U); b += Ttab(p); b ^= a; q = b & 0x7fc; b = rotrFixed(b, 9U); c ^= Ttab(q); c += b; p = (p+c) & 0x7fc; c = rotrFixed(c, 9U); d += Ttab(p); d ^= c; q = (q+d) & 0x7fc; d = rotrFixed(d, 9U); a ^= Ttab(q); a += d; p = (p+a) & 0x7fc; b ^= Ttab(p); a = rotrFixed(a, 9U); q = (q+b) & 0x7fc; c += Ttab(q); b = rotrFixed(b, 9U); p = (p+c) & 0x7fc; d ^= Ttab(p); c = rotrFixed(c, 9U); q = (q+d) & 0x7fc; d = rotrFixed(d, 9U); a += Ttab(q); #define SEAL_OUTPUT(x) \ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 0, b + m_S[4*i+0]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 1, c ^ m_S[4*i+1]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 2, d + m_S[4*i+2]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, B::ToEnum(), 3, a ^ m_S[4*i+3]); CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SEAL_OUTPUT, 4*4); if (i & 1) { a += n3; b += n4; c ^= n3; d ^= n4; } else { a += n1; b += n2; c ^= n1; d ^= n2; } } if (++m_insideCounter == m_iterationsPerCount) { ++m_outsideCounter; m_insideCounter = 0; } } a = b = c = d = n1 = n2 = n3 = n4 = 0; p = q = 0; }
// OperateKeystream always produces a key stream. The key stream is written // to output. Optionally a message may be supplied to xor with the key stream. // The message is input, and output = output ^ input. void ChaCha_Policy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount) { do { #if (CRYPTOPP_AVX2_AVAILABLE) if (HasAVX2()) { while (iterationCount >= 8 && MultiBlockSafe(8)) { const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL; ChaCha_OperateKeystream_AVX2(m_state, xorInput ? input : NULLPTR, output, m_rounds); // MultiBlockSafe avoids overflow on the counter words m_state[12] += 8; //if (m_state[12] < 8) // m_state[13]++; input += (!!xorInput) * 8 * BYTES_PER_ITERATION; output += 8 * BYTES_PER_ITERATION; iterationCount -= 8; } } #endif #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE) if (HasSSE2()) { while (iterationCount >= 4 && MultiBlockSafe(4)) { const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL; ChaCha_OperateKeystream_SSE2(m_state, xorInput ? input : NULLPTR, output, m_rounds); // MultiBlockSafe avoids overflow on the counter words m_state[12] += 4; //if (m_state[12] < 4) // m_state[13]++; input += (!!xorInput)*4*BYTES_PER_ITERATION; output += 4*BYTES_PER_ITERATION; iterationCount -= 4; } } #endif #if (CRYPTOPP_ARM_NEON_AVAILABLE) if (HasNEON()) { while (iterationCount >= 4 && MultiBlockSafe(4)) { const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL; ChaCha_OperateKeystream_NEON(m_state, xorInput ? input : NULLPTR, output, m_rounds); // MultiBlockSafe avoids overflow on the counter words m_state[12] += 4; //if (m_state[12] < 4) // m_state[13]++; input += (!!xorInput)*4*BYTES_PER_ITERATION; output += 4*BYTES_PER_ITERATION; iterationCount -= 4; } } #endif #if (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) { while (iterationCount >= 4 && MultiBlockSafe(4)) { const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL; ChaCha_OperateKeystream_POWER7(m_state, xorInput ? input : NULLPTR, output, m_rounds); // MultiBlockSafe avoids overflow on the counter words m_state[12] += 4; //if (m_state[12] < 4) // m_state[13]++; input += (!!xorInput)*4*BYTES_PER_ITERATION; output += 4*BYTES_PER_ITERATION; iterationCount -= 4; } } #endif if (iterationCount) { word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3]; x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7]; x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11]; x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15]; for (int i = static_cast<int>(m_rounds); i > 0; i -= 2) { CHACHA_QUARTER_ROUND(x0, x4, x8, x12); CHACHA_QUARTER_ROUND(x1, x5, x9, x13); CHACHA_QUARTER_ROUND(x2, x6, x10, x14); CHACHA_QUARTER_ROUND(x3, x7, x11, x15); CHACHA_QUARTER_ROUND(x0, x5, x10, x15); CHACHA_QUARTER_ROUND(x1, x6, x11, x12); CHACHA_QUARTER_ROUND(x2, x7, x8, x13); CHACHA_QUARTER_ROUND(x3, x4, x9, x14); } CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(CHACHA_OUTPUT, BYTES_PER_ITERATION); if (++m_state[12] == 0) m_state[13]++; } // We may re-enter a SIMD keystream operation from here. } while (iterationCount--); }