std::string ChaCha_Policy::AlgorithmProvider() const { #if (CRYPTOPP_AVX2_AVAILABLE) if (HasAVX2()) return "AVX2"; else #endif #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE) if (HasSSE2()) return "SSE2"; else #endif #if (CRYPTOPP_ARM_NEON_AVAILABLE) if (HasNEON()) return "NEON"; else #endif #if (CRYPTOPP_POWER7_AVAILABLE) if (HasPower7()) return "Power7"; else #endif #if (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) return "Altivec"; else #endif return "C++"; }
unsigned int ChaCha_Policy::GetAlignment() const { #if (CRYPTOPP_AVX2_AVAILABLE) if (HasAVX2()) return 16; else #endif #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE) if (HasSSE2()) return 16; else #endif #if (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) return 16; else #endif return GetAlignmentOf<word32>(); }
nglString nglCPUInfo::Dump() { nglString text, buffer; uint count = GetCount(); if (count == 0) { text = _T("no host CPU information available"); return text; } switch (GetFamily()) { case eUnknown: text += _T("unknown"); break; case eIA32 : text += _T("IA32"); break; case eIA64 : text += _T("IA64"); break; case ePPC : text += _T("PPC"); break; case eAlpha : text += _T("Alpha"); break; case eMIPS : text += _T("MIPS"); break; } if (count > 1) { buffer.Format(_T(" x %d"), count); text += _T(" x %d"); } buffer.Format(_T("%s%s%s%s%s"), HasMMX() ? _T(" MMX") : _T(""), HasSSE() ? _T(" SSE") : _T(""), HasSSE2() ? _T(" SSE2") : _T(""), Has3DNow() ? _T(" 3DNow") : _T(""), HasAltivec() ? _T(" Altivec") : _T("")); if (buffer.GetLength()) { text += _T(" with"); text += buffer; } return text; }
unsigned int ChaCha_Policy::GetOptimalBlockSize() const { #if (CRYPTOPP_AVX2_AVAILABLE) if (HasAVX2()) return 8 * BYTES_PER_ITERATION; else #endif #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE) if (HasSSE2()) return 4*BYTES_PER_ITERATION; else #endif #if (CRYPTOPP_ARM_NEON_AVAILABLE) if (HasNEON()) return 4*BYTES_PER_ITERATION; else #endif #if (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) return 4*BYTES_PER_ITERATION; else #endif return BYTES_PER_ITERATION; }
// OperateKeystream always produces a key stream. The key stream is written // to output. Optionally a message may be supplied to xor with the key stream. // The message is input, and output = output ^ input. void ChaCha_Policy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount) { do { #if (CRYPTOPP_AVX2_AVAILABLE) if (HasAVX2()) { while (iterationCount >= 8 && MultiBlockSafe(8)) { const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL; ChaCha_OperateKeystream_AVX2(m_state, xorInput ? input : NULLPTR, output, m_rounds); // MultiBlockSafe avoids overflow on the counter words m_state[12] += 8; //if (m_state[12] < 8) // m_state[13]++; input += (!!xorInput) * 8 * BYTES_PER_ITERATION; output += 8 * BYTES_PER_ITERATION; iterationCount -= 8; } } #endif #if (CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE) if (HasSSE2()) { while (iterationCount >= 4 && MultiBlockSafe(4)) { const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL; ChaCha_OperateKeystream_SSE2(m_state, xorInput ? input : NULLPTR, output, m_rounds); // MultiBlockSafe avoids overflow on the counter words m_state[12] += 4; //if (m_state[12] < 4) // m_state[13]++; input += (!!xorInput)*4*BYTES_PER_ITERATION; output += 4*BYTES_PER_ITERATION; iterationCount -= 4; } } #endif #if (CRYPTOPP_ARM_NEON_AVAILABLE) if (HasNEON()) { while (iterationCount >= 4 && MultiBlockSafe(4)) { const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL; ChaCha_OperateKeystream_NEON(m_state, xorInput ? input : NULLPTR, output, m_rounds); // MultiBlockSafe avoids overflow on the counter words m_state[12] += 4; //if (m_state[12] < 4) // m_state[13]++; input += (!!xorInput)*4*BYTES_PER_ITERATION; output += 4*BYTES_PER_ITERATION; iterationCount -= 4; } } #endif #if (CRYPTOPP_ALTIVEC_AVAILABLE) if (HasAltivec()) { while (iterationCount >= 4 && MultiBlockSafe(4)) { const bool xorInput = (operation & INPUT_NULL) != INPUT_NULL; ChaCha_OperateKeystream_POWER7(m_state, xorInput ? input : NULLPTR, output, m_rounds); // MultiBlockSafe avoids overflow on the counter words m_state[12] += 4; //if (m_state[12] < 4) // m_state[13]++; input += (!!xorInput)*4*BYTES_PER_ITERATION; output += 4*BYTES_PER_ITERATION; iterationCount -= 4; } } #endif if (iterationCount) { word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3]; x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7]; x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11]; x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15]; for (int i = static_cast<int>(m_rounds); i > 0; i -= 2) { CHACHA_QUARTER_ROUND(x0, x4, x8, x12); CHACHA_QUARTER_ROUND(x1, x5, x9, x13); CHACHA_QUARTER_ROUND(x2, x6, x10, x14); CHACHA_QUARTER_ROUND(x3, x7, x11, x15); CHACHA_QUARTER_ROUND(x0, x5, x10, x15); CHACHA_QUARTER_ROUND(x1, x6, x11, x12); CHACHA_QUARTER_ROUND(x2, x7, x8, x13); CHACHA_QUARTER_ROUND(x3, x4, x9, x14); } CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(CHACHA_OUTPUT, BYTES_PER_ITERATION); if (++m_state[12] == 0) m_state[13]++; } // We may re-enter a SIMD keystream operation from here. } while (iterationCount--); }
bool TestAltivecOps() { std::cout << "\nTesting Altivec operations...\n\n"; if (HasAltivec() == false) { std::cout << "\nAltivec not available, skipping test." << std::endl; return true; } // These tests may seem superflous, but we really want to test the // Altivec/POWER4 implementation. That does not happen when POWER7 // or POWER8 is available because we use POWER7's unaligned loads // and stores with POWER8's AES, SHA, etc. These tests enage // Altivec/POWER4 without POWER7, like on an old PowerMac. //********** Unaligned loads and stores **********// bool pass1=true; CRYPTOPP_ALIGN_DATA(16) byte dest[20], src[20] = {23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4}; const byte st1[16] = {22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7}; const byte st2[16] = {21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6}; const byte st3[16] = {20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5}; VecStore(VecLoad(src), dest); pass1 = (0 == std::memcmp(src, dest, 16)) && pass1; CRYPTOPP_ASSERT(pass1); VecStore(VecLoad(src+1), dest+1); pass1 = (0 == std::memcmp(st1, dest+1, 16)) && pass1; CRYPTOPP_ASSERT(pass1); VecStore(VecLoad(src+2), dest+2); pass1 = (0 == std::memcmp(st2, dest+2, 16)) && pass1; CRYPTOPP_ASSERT(pass1); VecStore(VecLoad(src+3), dest+3); pass1 = (0 == std::memcmp(st3, dest+3, 16)) && pass1; CRYPTOPP_ASSERT(pass1); VecStoreBE(VecLoadBE(src), dest); pass1 = (0 == std::memcmp(src, dest, 16)) && pass1; CRYPTOPP_ASSERT(pass1); VecStoreBE(VecLoadBE(src+1), dest+1); pass1 = (0 == std::memcmp(st1, dest+1, 16)) && pass1; CRYPTOPP_ASSERT(pass1); VecStoreBE(VecLoadBE(src+2), dest+2); pass1 = (0 == std::memcmp(st2, dest+2, 16)) && pass1; CRYPTOPP_ASSERT(pass1); VecStoreBE(VecLoadBE(src+3), dest+3); pass1 = (0 == std::memcmp(st3, dest+3, 16)) && pass1; CRYPTOPP_ASSERT(pass1); #if (CRYPTOPP_LITTLE_ENDIAN) VecStore(VecLoadBE(src), dest); pass1 = (0 != std::memcmp(src, dest, 16)) && pass1; CRYPTOPP_ASSERT(pass1); VecStoreBE(VecLoad(src), dest); pass1 = (0 != std::memcmp(src, dest, 16)) && pass1; CRYPTOPP_ASSERT(pass1); #endif if (!pass1) std::cout << "FAILED:"; else std::cout << "passed:"; std::cout << " Altivec loads and stores" << std::endl; //********** Shifts **********// bool pass2=true; uint8x16_p val = {0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff}; pass2 = (VecEqual(val, VecShiftLeftOctet<0>(val))) && pass2; CRYPTOPP_ASSERT(pass2); pass2 = (VecEqual(val, VecShiftRightOctet<0>(val))) && pass2; CRYPTOPP_ASSERT(pass2); uint8x16_p lsh1 = {0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0x00}; uint8x16_p rsh1 = {0x00,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff}; pass2 = (VecEqual(lsh1, VecShiftLeftOctet<1>(val))) && pass2; CRYPTOPP_ASSERT(pass2); pass2 = (VecEqual(rsh1, VecShiftRightOctet<1>(val))) && pass2; CRYPTOPP_ASSERT(pass2); uint8x16_p lsh15 = {0xff,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00}; uint8x16_p rsh15 = {0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0xff}; pass2 = (VecEqual(lsh15, VecShiftLeftOctet<15>(val))) && pass2; CRYPTOPP_ASSERT(pass2); pass2 = (VecEqual(rsh15, VecShiftRightOctet<15>(val))) && pass2; CRYPTOPP_ASSERT(pass2); uint8x16_p lsh16 = {0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00}; uint8x16_p rsh16 = {0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00}; pass2 = (VecEqual(lsh16, VecShiftLeftOctet<16>(val))) && pass2; CRYPTOPP_ASSERT(pass2); pass2 = (VecEqual(rsh16, VecShiftRightOctet<16>(val))) && pass2; CRYPTOPP_ASSERT(pass2); if (!pass2) std::cout << "FAILED:"; else std::cout << "passed:"; std::cout << " Altivec left and right shifts" << std::endl; //********** Extraction **********// bool pass3=true; const byte bex1[] = {0x1f,0x1e,0x1d,0x1c, 0x1b,0x1a,0x19,0x18, 0x17,0x16,0x15,0x14, 0x13,0x12,0x11,0x10}; const byte bex2[] = {0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x17,0x16,0x15,0x14, 0x13,0x12,0x11,0x10}; const byte bex3[] = {0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00, 0x1f,0x1e,0x1d,0x1c, 0x1b,0x1a,0x19,0x18}; const uint8x16_p ex1 = (uint8x16_p)VecLoad(bex1); const uint8x16_p ex2 = (uint8x16_p)VecLoad(bex2); const uint8x16_p ex3 = (uint8x16_p)VecLoad(bex3); pass3 = VecEqual(ex2, VecGetLow(ex1)) && pass3; CRYPTOPP_ASSERT(pass3); pass3 = VecEqual(ex3, VecGetHigh(ex1)) && pass3; CRYPTOPP_ASSERT(pass3); uint8x16_p ex4 = VecShiftRightOctet<8>(VecShiftLeftOctet<8>(ex1)); pass3 = VecEqual(ex4, VecGetLow(ex1)) && pass3; CRYPTOPP_ASSERT(pass3); uint8x16_p ex5 = VecShiftRightOctet<8>(ex1); pass3 = VecEqual(ex5, VecGetHigh(ex1)) && pass3; CRYPTOPP_ASSERT(pass3); if (!pass3) std::cout << "FAILED:"; else std::cout << "passed:"; std::cout << " Altivec vector extraction" << std::endl; return pass1 && pass2 && pass3; }