void CRC32C_Update_ARMV8(const byte *s, size_t n, word32& c) { for(; !IsAligned<word32>(s) && n > 0; s++, n--) c = __crc32cb(c, *s); for(; n > 4; s+=4, n-=4) c = __crc32cw(c, *(const word32 *)(void*)s); for(; n > 0; s++, n--) c = __crc32cb(c, *s); }
void CRC32C::Update(const byte *s, size_t n) { #if CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE if (HasSSE4()) { for(; !IsAligned<word32>(s) && n > 0; s++, n--) m_crc = _mm_crc32_u8(m_crc, *s); for(; n > 4; s+=4, n-=4) m_crc = _mm_crc32_u32(m_crc, *(const word32 *)(void*)s); for(; n > 0; s++, n--) m_crc = _mm_crc32_u8(m_crc, *s); return; } #elif (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) if (HasCRC32()) { for(; !IsAligned<word32>(s) && n > 0; s++, n--) m_crc = __crc32cb(m_crc, *s); for(; n > 4; s+=4, n-=4) m_crc = __crc32cw(m_crc, *(const word32 *)(void*)s); for(; n > 0; s++, n--) m_crc = __crc32cb(m_crc, *s); return; } #endif word32 crc = m_crc; for(; !IsAligned<word32>(s) && n > 0; n--) crc = m_tab[CRC32_INDEX(crc) ^ *s++] ^ CRC32_SHIFTED(crc); while (n >= 4) { crc ^= *(const word32 *)(void*)s; crc = m_tab[CRC32_INDEX(crc)] ^ CRC32_SHIFTED(crc); crc = m_tab[CRC32_INDEX(crc)] ^ CRC32_SHIFTED(crc); crc = m_tab[CRC32_INDEX(crc)] ^ CRC32_SHIFTED(crc); crc = m_tab[CRC32_INDEX(crc)] ^ CRC32_SHIFTED(crc); n -= 4; s += 4; } while (n--) crc = m_tab[CRC32_INDEX(crc) ^ *s++] ^ CRC32_SHIFTED(crc); m_crc = crc; }
bool CPU_ProbeCRC32() { #if defined(CRYPTOPP_NO_CPU_FEATURE_PROBES) return false; #elif (CRYPTOPP_ARM_CRC32_AVAILABLE) # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) volatile bool result = true; __try { word32 w=0, x=1; word16 y=2; byte z=3; w = __crc32w(w,x); w = __crc32h(w,y); w = __crc32b(w,z); w = __crc32cw(w,x); w = __crc32ch(w,y); w = __crc32cb(w,z); result = !!w; } __except (EXCEPTION_EXECUTE_HANDLER) { return false; } return result; #else // longjmp and clobber warnings. Volatile is required. // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 volatile bool result = true; volatile SigHandler oldHandler = signal(SIGILL, SigIllHandler); if (oldHandler == SIG_ERR) return false; volatile sigset_t oldMask; if (sigprocmask(0, NULLPTR, (sigset_t*)&oldMask)) return false; if (setjmp(s_jmpSIGILL)) result = false; else { word32 w=0, x=1; word16 y=2; byte z=3; w = __crc32w(w,x); w = __crc32h(w,y); w = __crc32b(w,z); w = __crc32cw(w,x); w = __crc32ch(w,y); w = __crc32cb(w,z); // Hack... GCC optimizes away the code and returns true result = !!w; } sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULLPTR); signal(SIGILL, oldHandler); return result; # endif #else return false; #endif // CRYPTOPP_ARM_CRC32_AVAILABLE }
// ARM-LABEL: test_crc32cw // AArch32: call i32 @llvm.arm.crc32cw // AArch64: call i32 @llvm.aarch64.crc32cw uint32_t test_crc32cw(uint32_t a, uint32_t b) { return __crc32cw(a, b); }
uint32_t test_crc32cw (uint32_t arg0, uint32_t arg1) { return __crc32cw (arg0, arg1); }
/* * Function to calculate reflected crc with PMULL Instruction * crc done "by 3" for fixed input block size of 1024 bytes */ uint32_t crc32c_arm64(unsigned char const *data, unsigned long length) { signed long len = length; uint32_t crc = ~0; uint32_t crc0, crc1, crc2; /* Load two consts: K1 and K2 */ const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014; uint64_t t0, t1; while ((len -= 1024) >= 0) { /* Do first 8 bytes here for better pipelining */ crc0 = __crc32cd(crc, *(const uint64_t *)data); crc1 = 0; crc2 = 0; data += sizeof(uint64_t); /* Process block inline Process crc0 last to avoid dependency with above */ CRC32C7X3X8(0); CRC32C7X3X8(1); CRC32C7X3X8(2); CRC32C7X3X8(3); CRC32C7X3X8(4); CRC32C7X3X8(5); data += 42*3*sizeof(uint64_t); /* Merge crc0 and crc1 into crc2 crc1 multiply by K2 crc0 multiply by K1 */ t1 = (uint64_t)vmull_p64(crc1, k2); t0 = (uint64_t)vmull_p64(crc0, k1); crc = __crc32cd(crc2, *(const uint64_t *)data); crc1 = __crc32cd(0, t1); crc ^= crc1; crc0 = __crc32cd(0, t0); crc ^= crc0; data += sizeof(uint64_t); } if (!(len += 1024)) return crc; while ((len -= sizeof(uint64_t)) >= 0) { crc = __crc32cd(crc, *(const uint64_t *)data); data += sizeof(uint64_t); } /* The following is more efficient than the straight loop */ if (len & sizeof(uint32_t)) { crc = __crc32cw(crc, *(const uint32_t *)data); data += sizeof(uint32_t); } if (len & sizeof(uint16_t)) { crc = __crc32ch(crc, *(const uint16_t *)data); data += sizeof(uint16_t); } if (len & sizeof(uint8_t)) { crc = __crc32cb(crc, *(const uint8_t *)data); } return crc; }